Merge trunk version 227333 into gupc branch.
[official-gcc.git] / gcc / config / i386 / i386.c
blob85284c21dfaaa4044a7a188fef6b22a3162e7927
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "cfghooks.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "alias.h"
30 #include "fold-const.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "calls.h"
34 #include "stor-layout.h"
35 #include "varasm.h"
36 #include "tm_p.h"
37 #include "regs.h"
38 #include "insn-config.h"
39 #include "conditions.h"
40 #include "output.h"
41 #include "insn-codes.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "except.h"
45 #include "recog.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "optabs.h"
53 #include "diagnostic-core.h"
54 #include "toplev.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "target.h"
61 #include "common/common-target.h"
62 #include "langhooks.h"
63 #include "reload.h"
64 #include "cgraph.h"
65 #include "internal-fn.h"
66 #include "gimple-fold.h"
67 #include "tree-eh.h"
68 #include "gimplify.h"
69 #include "cfgloop.h"
70 #include "dwarf2.h"
71 #include "tm-constrs.h"
72 #include "params.h"
73 #include "cselib.h"
74 #include "debug.h"
75 #include "sched-int.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "context.h"
81 #include "pass_manager.h"
82 #include "target-globals.h"
83 #include "tree-vectorizer.h"
84 #include "shrink-wrap.h"
85 #include "builtins.h"
86 #include "rtl-iter.h"
87 #include "tree-iterator.h"
88 #include "tree-chkp.h"
89 #include "rtl-chkp.h"
91 /* This file should be included last. */
92 #include "target-def.h"
94 static rtx legitimize_dllimport_symbol (rtx, bool);
95 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
96 static rtx legitimize_pe_coff_symbol (rtx, bool);
98 #ifndef CHECK_STACK_LIMIT
99 #define CHECK_STACK_LIMIT (-1)
100 #endif
102 /* Return index of given mode in mult and division cost tables. */
103 #define MODE_INDEX(mode) \
104 ((mode) == QImode ? 0 \
105 : (mode) == HImode ? 1 \
106 : (mode) == SImode ? 2 \
107 : (mode) == DImode ? 3 \
108 : 4)
110 /* Processor costs (relative to an add) */
111 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
112 #define COSTS_N_BYTES(N) ((N) * 2)
114 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
116 static stringop_algs ix86_size_memcpy[2] = {
117 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
118 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
119 static stringop_algs ix86_size_memset[2] = {
120 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
121 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
123 const
124 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
125 COSTS_N_BYTES (2), /* cost of an add instruction */
126 COSTS_N_BYTES (3), /* cost of a lea instruction */
127 COSTS_N_BYTES (2), /* variable shift costs */
128 COSTS_N_BYTES (3), /* constant shift costs */
129 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 0, /* cost of multiply per each bit set */
135 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
136 COSTS_N_BYTES (3), /* HI */
137 COSTS_N_BYTES (3), /* SI */
138 COSTS_N_BYTES (3), /* DI */
139 COSTS_N_BYTES (5)}, /* other */
140 COSTS_N_BYTES (3), /* cost of movsx */
141 COSTS_N_BYTES (3), /* cost of movzx */
142 0, /* "large" insn */
143 2, /* MOVE_RATIO */
144 2, /* cost for loading QImode using movzbl */
145 {2, 2, 2}, /* cost of loading integer registers
146 in QImode, HImode and SImode.
147 Relative to reg-reg move (2). */
148 {2, 2, 2}, /* cost of storing integer registers */
149 2, /* cost of reg,reg fld/fst */
150 {2, 2, 2}, /* cost of loading fp registers
151 in SFmode, DFmode and XFmode */
152 {2, 2, 2}, /* cost of storing fp registers
153 in SFmode, DFmode and XFmode */
154 3, /* cost of moving MMX register */
155 {3, 3}, /* cost of loading MMX registers
156 in SImode and DImode */
157 {3, 3}, /* cost of storing MMX registers
158 in SImode and DImode */
159 3, /* cost of moving SSE register */
160 {3, 3, 3}, /* cost of loading SSE registers
161 in SImode, DImode and TImode */
162 {3, 3, 3}, /* cost of storing SSE registers
163 in SImode, DImode and TImode */
164 3, /* MMX or SSE register to integer */
165 0, /* size of l1 cache */
166 0, /* size of l2 cache */
167 0, /* size of prefetch block */
168 0, /* number of parallel prefetches */
169 2, /* Branch cost */
170 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
171 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
172 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
173 COSTS_N_BYTES (2), /* cost of FABS instruction. */
174 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
175 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
176 ix86_size_memcpy,
177 ix86_size_memset,
178 1, /* scalar_stmt_cost. */
179 1, /* scalar load_cost. */
180 1, /* scalar_store_cost. */
181 1, /* vec_stmt_cost. */
182 1, /* vec_to_scalar_cost. */
183 1, /* scalar_to_vec_cost. */
184 1, /* vec_align_load_cost. */
185 1, /* vec_unalign_load_cost. */
186 1, /* vec_store_cost. */
187 1, /* cond_taken_branch_cost. */
188 1, /* cond_not_taken_branch_cost. */
191 /* Processor costs (relative to an add) */
192 static stringop_algs i386_memcpy[2] = {
193 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
194 DUMMY_STRINGOP_ALGS};
195 static stringop_algs i386_memset[2] = {
196 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
197 DUMMY_STRINGOP_ALGS};
199 static const
200 struct processor_costs i386_cost = { /* 386 specific costs */
201 COSTS_N_INSNS (1), /* cost of an add instruction */
202 COSTS_N_INSNS (1), /* cost of a lea instruction */
203 COSTS_N_INSNS (3), /* variable shift costs */
204 COSTS_N_INSNS (2), /* constant shift costs */
205 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
206 COSTS_N_INSNS (6), /* HI */
207 COSTS_N_INSNS (6), /* SI */
208 COSTS_N_INSNS (6), /* DI */
209 COSTS_N_INSNS (6)}, /* other */
210 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
211 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
212 COSTS_N_INSNS (23), /* HI */
213 COSTS_N_INSNS (23), /* SI */
214 COSTS_N_INSNS (23), /* DI */
215 COSTS_N_INSNS (23)}, /* other */
216 COSTS_N_INSNS (3), /* cost of movsx */
217 COSTS_N_INSNS (2), /* cost of movzx */
218 15, /* "large" insn */
219 3, /* MOVE_RATIO */
220 4, /* cost for loading QImode using movzbl */
221 {2, 4, 2}, /* cost of loading integer registers
222 in QImode, HImode and SImode.
223 Relative to reg-reg move (2). */
224 {2, 4, 2}, /* cost of storing integer registers */
225 2, /* cost of reg,reg fld/fst */
226 {8, 8, 8}, /* cost of loading fp registers
227 in SFmode, DFmode and XFmode */
228 {8, 8, 8}, /* cost of storing fp registers
229 in SFmode, DFmode and XFmode */
230 2, /* cost of moving MMX register */
231 {4, 8}, /* cost of loading MMX registers
232 in SImode and DImode */
233 {4, 8}, /* cost of storing MMX registers
234 in SImode and DImode */
235 2, /* cost of moving SSE register */
236 {4, 8, 16}, /* cost of loading SSE registers
237 in SImode, DImode and TImode */
238 {4, 8, 16}, /* cost of storing SSE registers
239 in SImode, DImode and TImode */
240 3, /* MMX or SSE register to integer */
241 0, /* size of l1 cache */
242 0, /* size of l2 cache */
243 0, /* size of prefetch block */
244 0, /* number of parallel prefetches */
245 1, /* Branch cost */
246 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
247 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
248 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
249 COSTS_N_INSNS (22), /* cost of FABS instruction. */
250 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
251 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
252 i386_memcpy,
253 i386_memset,
254 1, /* scalar_stmt_cost. */
255 1, /* scalar load_cost. */
256 1, /* scalar_store_cost. */
257 1, /* vec_stmt_cost. */
258 1, /* vec_to_scalar_cost. */
259 1, /* scalar_to_vec_cost. */
260 1, /* vec_align_load_cost. */
261 2, /* vec_unalign_load_cost. */
262 1, /* vec_store_cost. */
263 3, /* cond_taken_branch_cost. */
264 1, /* cond_not_taken_branch_cost. */
267 static stringop_algs i486_memcpy[2] = {
268 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
269 DUMMY_STRINGOP_ALGS};
270 static stringop_algs i486_memset[2] = {
271 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
272 DUMMY_STRINGOP_ALGS};
274 static const
275 struct processor_costs i486_cost = { /* 486 specific costs */
276 COSTS_N_INSNS (1), /* cost of an add instruction */
277 COSTS_N_INSNS (1), /* cost of a lea instruction */
278 COSTS_N_INSNS (3), /* variable shift costs */
279 COSTS_N_INSNS (2), /* constant shift costs */
280 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
281 COSTS_N_INSNS (12), /* HI */
282 COSTS_N_INSNS (12), /* SI */
283 COSTS_N_INSNS (12), /* DI */
284 COSTS_N_INSNS (12)}, /* other */
285 1, /* cost of multiply per each bit set */
286 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
287 COSTS_N_INSNS (40), /* HI */
288 COSTS_N_INSNS (40), /* SI */
289 COSTS_N_INSNS (40), /* DI */
290 COSTS_N_INSNS (40)}, /* other */
291 COSTS_N_INSNS (3), /* cost of movsx */
292 COSTS_N_INSNS (2), /* cost of movzx */
293 15, /* "large" insn */
294 3, /* MOVE_RATIO */
295 4, /* cost for loading QImode using movzbl */
296 {2, 4, 2}, /* cost of loading integer registers
297 in QImode, HImode and SImode.
298 Relative to reg-reg move (2). */
299 {2, 4, 2}, /* cost of storing integer registers */
300 2, /* cost of reg,reg fld/fst */
301 {8, 8, 8}, /* cost of loading fp registers
302 in SFmode, DFmode and XFmode */
303 {8, 8, 8}, /* cost of storing fp registers
304 in SFmode, DFmode and XFmode */
305 2, /* cost of moving MMX register */
306 {4, 8}, /* cost of loading MMX registers
307 in SImode and DImode */
308 {4, 8}, /* cost of storing MMX registers
309 in SImode and DImode */
310 2, /* cost of moving SSE register */
311 {4, 8, 16}, /* cost of loading SSE registers
312 in SImode, DImode and TImode */
313 {4, 8, 16}, /* cost of storing SSE registers
314 in SImode, DImode and TImode */
315 3, /* MMX or SSE register to integer */
316 4, /* size of l1 cache. 486 has 8kB cache
317 shared for code and data, so 4kB is
318 not really precise. */
319 4, /* size of l2 cache */
320 0, /* size of prefetch block */
321 0, /* number of parallel prefetches */
322 1, /* Branch cost */
323 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
324 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
325 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
326 COSTS_N_INSNS (3), /* cost of FABS instruction. */
327 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
328 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
329 i486_memcpy,
330 i486_memset,
331 1, /* scalar_stmt_cost. */
332 1, /* scalar load_cost. */
333 1, /* scalar_store_cost. */
334 1, /* vec_stmt_cost. */
335 1, /* vec_to_scalar_cost. */
336 1, /* scalar_to_vec_cost. */
337 1, /* vec_align_load_cost. */
338 2, /* vec_unalign_load_cost. */
339 1, /* vec_store_cost. */
340 3, /* cond_taken_branch_cost. */
341 1, /* cond_not_taken_branch_cost. */
344 static stringop_algs pentium_memcpy[2] = {
345 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
346 DUMMY_STRINGOP_ALGS};
347 static stringop_algs pentium_memset[2] = {
348 {libcall, {{-1, rep_prefix_4_byte, false}}},
349 DUMMY_STRINGOP_ALGS};
351 static const
352 struct processor_costs pentium_cost = {
353 COSTS_N_INSNS (1), /* cost of an add instruction */
354 COSTS_N_INSNS (1), /* cost of a lea instruction */
355 COSTS_N_INSNS (4), /* variable shift costs */
356 COSTS_N_INSNS (1), /* constant shift costs */
357 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
358 COSTS_N_INSNS (11), /* HI */
359 COSTS_N_INSNS (11), /* SI */
360 COSTS_N_INSNS (11), /* DI */
361 COSTS_N_INSNS (11)}, /* other */
362 0, /* cost of multiply per each bit set */
363 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
364 COSTS_N_INSNS (25), /* HI */
365 COSTS_N_INSNS (25), /* SI */
366 COSTS_N_INSNS (25), /* DI */
367 COSTS_N_INSNS (25)}, /* other */
368 COSTS_N_INSNS (3), /* cost of movsx */
369 COSTS_N_INSNS (2), /* cost of movzx */
370 8, /* "large" insn */
371 6, /* MOVE_RATIO */
372 6, /* cost for loading QImode using movzbl */
373 {2, 4, 2}, /* cost of loading integer registers
374 in QImode, HImode and SImode.
375 Relative to reg-reg move (2). */
376 {2, 4, 2}, /* cost of storing integer registers */
377 2, /* cost of reg,reg fld/fst */
378 {2, 2, 6}, /* cost of loading fp registers
379 in SFmode, DFmode and XFmode */
380 {4, 4, 6}, /* cost of storing fp registers
381 in SFmode, DFmode and XFmode */
382 8, /* cost of moving MMX register */
383 {8, 8}, /* cost of loading MMX registers
384 in SImode and DImode */
385 {8, 8}, /* cost of storing MMX registers
386 in SImode and DImode */
387 2, /* cost of moving SSE register */
388 {4, 8, 16}, /* cost of loading SSE registers
389 in SImode, DImode and TImode */
390 {4, 8, 16}, /* cost of storing SSE registers
391 in SImode, DImode and TImode */
392 3, /* MMX or SSE register to integer */
393 8, /* size of l1 cache. */
394 8, /* size of l2 cache */
395 0, /* size of prefetch block */
396 0, /* number of parallel prefetches */
397 2, /* Branch cost */
398 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
399 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
400 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
401 COSTS_N_INSNS (1), /* cost of FABS instruction. */
402 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
403 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
404 pentium_memcpy,
405 pentium_memset,
406 1, /* scalar_stmt_cost. */
407 1, /* scalar load_cost. */
408 1, /* scalar_store_cost. */
409 1, /* vec_stmt_cost. */
410 1, /* vec_to_scalar_cost. */
411 1, /* scalar_to_vec_cost. */
412 1, /* vec_align_load_cost. */
413 2, /* vec_unalign_load_cost. */
414 1, /* vec_store_cost. */
415 3, /* cond_taken_branch_cost. */
416 1, /* cond_not_taken_branch_cost. */
419 static const
420 struct processor_costs iamcu_cost = {
421 COSTS_N_INSNS (1), /* cost of an add instruction */
422 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
423 COSTS_N_INSNS (1), /* variable shift costs */
424 COSTS_N_INSNS (1), /* constant shift costs */
425 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
426 COSTS_N_INSNS (11), /* HI */
427 COSTS_N_INSNS (11), /* SI */
428 COSTS_N_INSNS (11), /* DI */
429 COSTS_N_INSNS (11)}, /* other */
430 0, /* cost of multiply per each bit set */
431 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
432 COSTS_N_INSNS (25), /* HI */
433 COSTS_N_INSNS (25), /* SI */
434 COSTS_N_INSNS (25), /* DI */
435 COSTS_N_INSNS (25)}, /* other */
436 COSTS_N_INSNS (3), /* cost of movsx */
437 COSTS_N_INSNS (2), /* cost of movzx */
438 8, /* "large" insn */
439 9, /* MOVE_RATIO */
440 6, /* cost for loading QImode using movzbl */
441 {2, 4, 2}, /* cost of loading integer registers
442 in QImode, HImode and SImode.
443 Relative to reg-reg move (2). */
444 {2, 4, 2}, /* cost of storing integer registers */
445 2, /* cost of reg,reg fld/fst */
446 {2, 2, 6}, /* cost of loading fp registers
447 in SFmode, DFmode and XFmode */
448 {4, 4, 6}, /* cost of storing fp registers
449 in SFmode, DFmode and XFmode */
450 8, /* cost of moving MMX register */
451 {8, 8}, /* cost of loading MMX registers
452 in SImode and DImode */
453 {8, 8}, /* cost of storing MMX registers
454 in SImode and DImode */
455 2, /* cost of moving SSE register */
456 {4, 8, 16}, /* cost of loading SSE registers
457 in SImode, DImode and TImode */
458 {4, 8, 16}, /* cost of storing SSE registers
459 in SImode, DImode and TImode */
460 3, /* MMX or SSE register to integer */
461 8, /* size of l1 cache. */
462 8, /* size of l2 cache */
463 0, /* size of prefetch block */
464 0, /* number of parallel prefetches */
465 2, /* Branch cost */
466 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
467 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
468 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
469 COSTS_N_INSNS (1), /* cost of FABS instruction. */
470 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
471 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
472 pentium_memcpy,
473 pentium_memset,
474 1, /* scalar_stmt_cost. */
475 1, /* scalar load_cost. */
476 1, /* scalar_store_cost. */
477 1, /* vec_stmt_cost. */
478 1, /* vec_to_scalar_cost. */
479 1, /* scalar_to_vec_cost. */
480 1, /* vec_align_load_cost. */
481 2, /* vec_unalign_load_cost. */
482 1, /* vec_store_cost. */
483 3, /* cond_taken_branch_cost. */
484 1, /* cond_not_taken_branch_cost. */
487 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
488 (we ensure the alignment). For small blocks inline loop is still a
489 noticeable win, for bigger blocks either rep movsl or rep movsb is
490 way to go. Rep movsb has apparently more expensive startup time in CPU,
491 but after 4K the difference is down in the noise. */
492 static stringop_algs pentiumpro_memcpy[2] = {
493 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
494 {8192, rep_prefix_4_byte, false},
495 {-1, rep_prefix_1_byte, false}}},
496 DUMMY_STRINGOP_ALGS};
497 static stringop_algs pentiumpro_memset[2] = {
498 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
499 {8192, rep_prefix_4_byte, false},
500 {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs pentiumpro_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (1), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (4), /* SI */
511 COSTS_N_INSNS (4), /* DI */
512 COSTS_N_INSNS (4)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (17), /* HI */
516 COSTS_N_INSNS (17), /* SI */
517 COSTS_N_INSNS (17), /* DI */
518 COSTS_N_INSNS (17)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 6, /* MOVE_RATIO */
523 2, /* cost for loading QImode using movzbl */
524 {4, 4, 4}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {2, 2, 2}, /* cost of storing integer registers */
528 2, /* cost of reg,reg fld/fst */
529 {2, 2, 6}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 4, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533 2, /* cost of moving MMX register */
534 {2, 2}, /* cost of loading MMX registers
535 in SImode and DImode */
536 {2, 2}, /* cost of storing MMX registers
537 in SImode and DImode */
538 2, /* cost of moving SSE register */
539 {2, 2, 8}, /* cost of loading SSE registers
540 in SImode, DImode and TImode */
541 {2, 2, 8}, /* cost of storing SSE registers
542 in SImode, DImode and TImode */
543 3, /* MMX or SSE register to integer */
544 8, /* size of l1 cache. */
545 256, /* size of l2 cache */
546 32, /* size of prefetch block */
547 6, /* number of parallel prefetches */
548 2, /* Branch cost */
549 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
550 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
551 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
552 COSTS_N_INSNS (2), /* cost of FABS instruction. */
553 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
554 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
555 pentiumpro_memcpy,
556 pentiumpro_memset,
557 1, /* scalar_stmt_cost. */
558 1, /* scalar load_cost. */
559 1, /* scalar_store_cost. */
560 1, /* vec_stmt_cost. */
561 1, /* vec_to_scalar_cost. */
562 1, /* scalar_to_vec_cost. */
563 1, /* vec_align_load_cost. */
564 2, /* vec_unalign_load_cost. */
565 1, /* vec_store_cost. */
566 3, /* cond_taken_branch_cost. */
567 1, /* cond_not_taken_branch_cost. */
570 static stringop_algs geode_memcpy[2] = {
571 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
572 DUMMY_STRINGOP_ALGS};
573 static stringop_algs geode_memset[2] = {
574 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
575 DUMMY_STRINGOP_ALGS};
576 static const
577 struct processor_costs geode_cost = {
578 COSTS_N_INSNS (1), /* cost of an add instruction */
579 COSTS_N_INSNS (1), /* cost of a lea instruction */
580 COSTS_N_INSNS (2), /* variable shift costs */
581 COSTS_N_INSNS (1), /* constant shift costs */
582 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
583 COSTS_N_INSNS (4), /* HI */
584 COSTS_N_INSNS (7), /* SI */
585 COSTS_N_INSNS (7), /* DI */
586 COSTS_N_INSNS (7)}, /* other */
587 0, /* cost of multiply per each bit set */
588 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
589 COSTS_N_INSNS (23), /* HI */
590 COSTS_N_INSNS (39), /* SI */
591 COSTS_N_INSNS (39), /* DI */
592 COSTS_N_INSNS (39)}, /* other */
593 COSTS_N_INSNS (1), /* cost of movsx */
594 COSTS_N_INSNS (1), /* cost of movzx */
595 8, /* "large" insn */
596 4, /* MOVE_RATIO */
597 1, /* cost for loading QImode using movzbl */
598 {1, 1, 1}, /* cost of loading integer registers
599 in QImode, HImode and SImode.
600 Relative to reg-reg move (2). */
601 {1, 1, 1}, /* cost of storing integer registers */
602 1, /* cost of reg,reg fld/fst */
603 {1, 1, 1}, /* cost of loading fp registers
604 in SFmode, DFmode and XFmode */
605 {4, 6, 6}, /* cost of storing fp registers
606 in SFmode, DFmode and XFmode */
608 1, /* cost of moving MMX register */
609 {1, 1}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {1, 1}, /* cost of storing MMX registers
612 in SImode and DImode */
613 1, /* cost of moving SSE register */
614 {1, 1, 1}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {1, 1, 1}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 1, /* MMX or SSE register to integer */
619 64, /* size of l1 cache. */
620 128, /* size of l2 cache. */
621 32, /* size of prefetch block */
622 1, /* number of parallel prefetches */
623 1, /* Branch cost */
624 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
625 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
626 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
627 COSTS_N_INSNS (1), /* cost of FABS instruction. */
628 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
629 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
630 geode_memcpy,
631 geode_memset,
632 1, /* scalar_stmt_cost. */
633 1, /* scalar load_cost. */
634 1, /* scalar_store_cost. */
635 1, /* vec_stmt_cost. */
636 1, /* vec_to_scalar_cost. */
637 1, /* scalar_to_vec_cost. */
638 1, /* vec_align_load_cost. */
639 2, /* vec_unalign_load_cost. */
640 1, /* vec_store_cost. */
641 3, /* cond_taken_branch_cost. */
642 1, /* cond_not_taken_branch_cost. */
645 static stringop_algs k6_memcpy[2] = {
646 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
647 DUMMY_STRINGOP_ALGS};
648 static stringop_algs k6_memset[2] = {
649 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
650 DUMMY_STRINGOP_ALGS};
651 static const
652 struct processor_costs k6_cost = {
653 COSTS_N_INSNS (1), /* cost of an add instruction */
654 COSTS_N_INSNS (2), /* cost of a lea instruction */
655 COSTS_N_INSNS (1), /* variable shift costs */
656 COSTS_N_INSNS (1), /* constant shift costs */
657 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
658 COSTS_N_INSNS (3), /* HI */
659 COSTS_N_INSNS (3), /* SI */
660 COSTS_N_INSNS (3), /* DI */
661 COSTS_N_INSNS (3)}, /* other */
662 0, /* cost of multiply per each bit set */
663 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
664 COSTS_N_INSNS (18), /* HI */
665 COSTS_N_INSNS (18), /* SI */
666 COSTS_N_INSNS (18), /* DI */
667 COSTS_N_INSNS (18)}, /* other */
668 COSTS_N_INSNS (2), /* cost of movsx */
669 COSTS_N_INSNS (2), /* cost of movzx */
670 8, /* "large" insn */
671 4, /* MOVE_RATIO */
672 3, /* cost for loading QImode using movzbl */
673 {4, 5, 4}, /* cost of loading integer registers
674 in QImode, HImode and SImode.
675 Relative to reg-reg move (2). */
676 {2, 3, 2}, /* cost of storing integer registers */
677 4, /* cost of reg,reg fld/fst */
678 {6, 6, 6}, /* cost of loading fp registers
679 in SFmode, DFmode and XFmode */
680 {4, 4, 4}, /* cost of storing fp registers
681 in SFmode, DFmode and XFmode */
682 2, /* cost of moving MMX register */
683 {2, 2}, /* cost of loading MMX registers
684 in SImode and DImode */
685 {2, 2}, /* cost of storing MMX registers
686 in SImode and DImode */
687 2, /* cost of moving SSE register */
688 {2, 2, 8}, /* cost of loading SSE registers
689 in SImode, DImode and TImode */
690 {2, 2, 8}, /* cost of storing SSE registers
691 in SImode, DImode and TImode */
692 6, /* MMX or SSE register to integer */
693 32, /* size of l1 cache. */
694 32, /* size of l2 cache. Some models
695 have integrated l2 cache, but
696 optimizing for k6 is not important
697 enough to worry about that. */
698 32, /* size of prefetch block */
699 1, /* number of parallel prefetches */
700 1, /* Branch cost */
701 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (2), /* cost of FABS instruction. */
705 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
707 k6_memcpy,
708 k6_memset,
709 1, /* scalar_stmt_cost. */
710 1, /* scalar load_cost. */
711 1, /* scalar_store_cost. */
712 1, /* vec_stmt_cost. */
713 1, /* vec_to_scalar_cost. */
714 1, /* scalar_to_vec_cost. */
715 1, /* vec_align_load_cost. */
716 2, /* vec_unalign_load_cost. */
717 1, /* vec_store_cost. */
718 3, /* cond_taken_branch_cost. */
719 1, /* cond_not_taken_branch_cost. */
722 /* For some reason, Athlon deals better with REP prefix (relative to loops)
723 compared to K8. Alignment becomes important after 8 bytes for memcpy and
724 128 bytes for memset. */
725 static stringop_algs athlon_memcpy[2] = {
726 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
727 DUMMY_STRINGOP_ALGS};
728 static stringop_algs athlon_memset[2] = {
729 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
730 DUMMY_STRINGOP_ALGS};
731 static const
732 struct processor_costs athlon_cost = {
733 COSTS_N_INSNS (1), /* cost of an add instruction */
734 COSTS_N_INSNS (2), /* cost of a lea instruction */
735 COSTS_N_INSNS (1), /* variable shift costs */
736 COSTS_N_INSNS (1), /* constant shift costs */
737 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
738 COSTS_N_INSNS (5), /* HI */
739 COSTS_N_INSNS (5), /* SI */
740 COSTS_N_INSNS (5), /* DI */
741 COSTS_N_INSNS (5)}, /* other */
742 0, /* cost of multiply per each bit set */
743 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
744 COSTS_N_INSNS (26), /* HI */
745 COSTS_N_INSNS (42), /* SI */
746 COSTS_N_INSNS (74), /* DI */
747 COSTS_N_INSNS (74)}, /* other */
748 COSTS_N_INSNS (1), /* cost of movsx */
749 COSTS_N_INSNS (1), /* cost of movzx */
750 8, /* "large" insn */
751 9, /* MOVE_RATIO */
752 4, /* cost for loading QImode using movzbl */
753 {3, 4, 3}, /* cost of loading integer registers
754 in QImode, HImode and SImode.
755 Relative to reg-reg move (2). */
756 {3, 4, 3}, /* cost of storing integer registers */
757 4, /* cost of reg,reg fld/fst */
758 {4, 4, 12}, /* cost of loading fp registers
759 in SFmode, DFmode and XFmode */
760 {6, 6, 8}, /* cost of storing fp registers
761 in SFmode, DFmode and XFmode */
762 2, /* cost of moving MMX register */
763 {4, 4}, /* cost of loading MMX registers
764 in SImode and DImode */
765 {4, 4}, /* cost of storing MMX registers
766 in SImode and DImode */
767 2, /* cost of moving SSE register */
768 {4, 4, 6}, /* cost of loading SSE registers
769 in SImode, DImode and TImode */
770 {4, 4, 5}, /* cost of storing SSE registers
771 in SImode, DImode and TImode */
772 5, /* MMX or SSE register to integer */
773 64, /* size of l1 cache. */
774 256, /* size of l2 cache. */
775 64, /* size of prefetch block */
776 6, /* number of parallel prefetches */
777 5, /* Branch cost */
778 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
779 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
780 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
781 COSTS_N_INSNS (2), /* cost of FABS instruction. */
782 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
783 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
784 athlon_memcpy,
785 athlon_memset,
786 1, /* scalar_stmt_cost. */
787 1, /* scalar load_cost. */
788 1, /* scalar_store_cost. */
789 1, /* vec_stmt_cost. */
790 1, /* vec_to_scalar_cost. */
791 1, /* scalar_to_vec_cost. */
792 1, /* vec_align_load_cost. */
793 2, /* vec_unalign_load_cost. */
794 1, /* vec_store_cost. */
795 3, /* cond_taken_branch_cost. */
796 1, /* cond_not_taken_branch_cost. */
799 /* K8 has optimized REP instruction for medium sized blocks, but for very
800 small blocks it is better to use loop. For large blocks, libcall can
801 do nontemporary accesses and beat inline considerably. */
802 static stringop_algs k8_memcpy[2] = {
803 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
804 {-1, rep_prefix_4_byte, false}}},
805 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
806 {-1, libcall, false}}}};
807 static stringop_algs k8_memset[2] = {
808 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
809 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
810 {libcall, {{48, unrolled_loop, false},
811 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
812 static const
813 struct processor_costs k8_cost = {
814 COSTS_N_INSNS (1), /* cost of an add instruction */
815 COSTS_N_INSNS (2), /* cost of a lea instruction */
816 COSTS_N_INSNS (1), /* variable shift costs */
817 COSTS_N_INSNS (1), /* constant shift costs */
818 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
819 COSTS_N_INSNS (4), /* HI */
820 COSTS_N_INSNS (3), /* SI */
821 COSTS_N_INSNS (4), /* DI */
822 COSTS_N_INSNS (5)}, /* other */
823 0, /* cost of multiply per each bit set */
824 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
825 COSTS_N_INSNS (26), /* HI */
826 COSTS_N_INSNS (42), /* SI */
827 COSTS_N_INSNS (74), /* DI */
828 COSTS_N_INSNS (74)}, /* other */
829 COSTS_N_INSNS (1), /* cost of movsx */
830 COSTS_N_INSNS (1), /* cost of movzx */
831 8, /* "large" insn */
832 9, /* MOVE_RATIO */
833 4, /* cost for loading QImode using movzbl */
834 {3, 4, 3}, /* cost of loading integer registers
835 in QImode, HImode and SImode.
836 Relative to reg-reg move (2). */
837 {3, 4, 3}, /* cost of storing integer registers */
838 4, /* cost of reg,reg fld/fst */
839 {4, 4, 12}, /* cost of loading fp registers
840 in SFmode, DFmode and XFmode */
841 {6, 6, 8}, /* cost of storing fp registers
842 in SFmode, DFmode and XFmode */
843 2, /* cost of moving MMX register */
844 {3, 3}, /* cost of loading MMX registers
845 in SImode and DImode */
846 {4, 4}, /* cost of storing MMX registers
847 in SImode and DImode */
848 2, /* cost of moving SSE register */
849 {4, 3, 6}, /* cost of loading SSE registers
850 in SImode, DImode and TImode */
851 {4, 4, 5}, /* cost of storing SSE registers
852 in SImode, DImode and TImode */
853 5, /* MMX or SSE register to integer */
854 64, /* size of l1 cache. */
855 512, /* size of l2 cache. */
856 64, /* size of prefetch block */
857 /* New AMD processors never drop prefetches; if they cannot be performed
858 immediately, they are queued. We set number of simultaneous prefetches
859 to a large constant to reflect this (it probably is not a good idea not
860 to limit number of prefetches at all, as their execution also takes some
861 time). */
862 100, /* number of parallel prefetches */
863 3, /* Branch cost */
864 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
865 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
866 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
867 COSTS_N_INSNS (2), /* cost of FABS instruction. */
868 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
869 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
871 k8_memcpy,
872 k8_memset,
873 4, /* scalar_stmt_cost. */
874 2, /* scalar load_cost. */
875 2, /* scalar_store_cost. */
876 5, /* vec_stmt_cost. */
877 0, /* vec_to_scalar_cost. */
878 2, /* scalar_to_vec_cost. */
879 2, /* vec_align_load_cost. */
880 3, /* vec_unalign_load_cost. */
881 3, /* vec_store_cost. */
882 3, /* cond_taken_branch_cost. */
883 2, /* cond_not_taken_branch_cost. */
886 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
887 very small blocks it is better to use loop. For large blocks, libcall can
888 do nontemporary accesses and beat inline considerably. */
889 static stringop_algs amdfam10_memcpy[2] = {
890 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
891 {-1, rep_prefix_4_byte, false}}},
892 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
893 {-1, libcall, false}}}};
894 static stringop_algs amdfam10_memset[2] = {
895 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
896 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
897 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
898 {-1, libcall, false}}}};
899 struct processor_costs amdfam10_cost = {
900 COSTS_N_INSNS (1), /* cost of an add instruction */
901 COSTS_N_INSNS (2), /* cost of a lea instruction */
902 COSTS_N_INSNS (1), /* variable shift costs */
903 COSTS_N_INSNS (1), /* constant shift costs */
904 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
905 COSTS_N_INSNS (4), /* HI */
906 COSTS_N_INSNS (3), /* SI */
907 COSTS_N_INSNS (4), /* DI */
908 COSTS_N_INSNS (5)}, /* other */
909 0, /* cost of multiply per each bit set */
910 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
911 COSTS_N_INSNS (35), /* HI */
912 COSTS_N_INSNS (51), /* SI */
913 COSTS_N_INSNS (83), /* DI */
914 COSTS_N_INSNS (83)}, /* other */
915 COSTS_N_INSNS (1), /* cost of movsx */
916 COSTS_N_INSNS (1), /* cost of movzx */
917 8, /* "large" insn */
918 9, /* MOVE_RATIO */
919 4, /* cost for loading QImode using movzbl */
920 {3, 4, 3}, /* cost of loading integer registers
921 in QImode, HImode and SImode.
922 Relative to reg-reg move (2). */
923 {3, 4, 3}, /* cost of storing integer registers */
924 4, /* cost of reg,reg fld/fst */
925 {4, 4, 12}, /* cost of loading fp registers
926 in SFmode, DFmode and XFmode */
927 {6, 6, 8}, /* cost of storing fp registers
928 in SFmode, DFmode and XFmode */
929 2, /* cost of moving MMX register */
930 {3, 3}, /* cost of loading MMX registers
931 in SImode and DImode */
932 {4, 4}, /* cost of storing MMX registers
933 in SImode and DImode */
934 2, /* cost of moving SSE register */
935 {4, 4, 3}, /* cost of loading SSE registers
936 in SImode, DImode and TImode */
937 {4, 4, 5}, /* cost of storing SSE registers
938 in SImode, DImode and TImode */
939 3, /* MMX or SSE register to integer */
940 /* On K8:
941 MOVD reg64, xmmreg Double FSTORE 4
942 MOVD reg32, xmmreg Double FSTORE 4
943 On AMDFAM10:
944 MOVD reg64, xmmreg Double FADD 3
945 1/1 1/1
946 MOVD reg32, xmmreg Double FADD 3
947 1/1 1/1 */
948 64, /* size of l1 cache. */
949 512, /* size of l2 cache. */
950 64, /* size of prefetch block */
951 /* New AMD processors never drop prefetches; if they cannot be performed
952 immediately, they are queued. We set number of simultaneous prefetches
953 to a large constant to reflect this (it probably is not a good idea not
954 to limit number of prefetches at all, as their execution also takes some
955 time). */
956 100, /* number of parallel prefetches */
957 2, /* Branch cost */
958 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
959 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
960 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
961 COSTS_N_INSNS (2), /* cost of FABS instruction. */
962 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
963 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
965 amdfam10_memcpy,
966 amdfam10_memset,
967 4, /* scalar_stmt_cost. */
968 2, /* scalar load_cost. */
969 2, /* scalar_store_cost. */
970 6, /* vec_stmt_cost. */
971 0, /* vec_to_scalar_cost. */
972 2, /* scalar_to_vec_cost. */
973 2, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 2, /* vec_store_cost. */
976 2, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
980 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
981 very small blocks it is better to use loop. For large blocks, libcall
982 can do nontemporary accesses and beat inline considerably. */
983 static stringop_algs bdver1_memcpy[2] = {
984 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
985 {-1, rep_prefix_4_byte, false}}},
986 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
987 {-1, libcall, false}}}};
988 static stringop_algs bdver1_memset[2] = {
989 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
990 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
991 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
992 {-1, libcall, false}}}};
994 const struct processor_costs bdver1_cost = {
995 COSTS_N_INSNS (1), /* cost of an add instruction */
996 COSTS_N_INSNS (1), /* cost of a lea instruction */
997 COSTS_N_INSNS (1), /* variable shift costs */
998 COSTS_N_INSNS (1), /* constant shift costs */
999 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1000 COSTS_N_INSNS (4), /* HI */
1001 COSTS_N_INSNS (4), /* SI */
1002 COSTS_N_INSNS (6), /* DI */
1003 COSTS_N_INSNS (6)}, /* other */
1004 0, /* cost of multiply per each bit set */
1005 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1006 COSTS_N_INSNS (35), /* HI */
1007 COSTS_N_INSNS (51), /* SI */
1008 COSTS_N_INSNS (83), /* DI */
1009 COSTS_N_INSNS (83)}, /* other */
1010 COSTS_N_INSNS (1), /* cost of movsx */
1011 COSTS_N_INSNS (1), /* cost of movzx */
1012 8, /* "large" insn */
1013 9, /* MOVE_RATIO */
1014 4, /* cost for loading QImode using movzbl */
1015 {5, 5, 4}, /* cost of loading integer registers
1016 in QImode, HImode and SImode.
1017 Relative to reg-reg move (2). */
1018 {4, 4, 4}, /* cost of storing integer registers */
1019 2, /* cost of reg,reg fld/fst */
1020 {5, 5, 12}, /* cost of loading fp registers
1021 in SFmode, DFmode and XFmode */
1022 {4, 4, 8}, /* cost of storing fp registers
1023 in SFmode, DFmode and XFmode */
1024 2, /* cost of moving MMX register */
1025 {4, 4}, /* cost of loading MMX registers
1026 in SImode and DImode */
1027 {4, 4}, /* cost of storing MMX registers
1028 in SImode and DImode */
1029 2, /* cost of moving SSE register */
1030 {4, 4, 4}, /* cost of loading SSE registers
1031 in SImode, DImode and TImode */
1032 {4, 4, 4}, /* cost of storing SSE registers
1033 in SImode, DImode and TImode */
1034 2, /* MMX or SSE register to integer */
1035 /* On K8:
1036 MOVD reg64, xmmreg Double FSTORE 4
1037 MOVD reg32, xmmreg Double FSTORE 4
1038 On AMDFAM10:
1039 MOVD reg64, xmmreg Double FADD 3
1040 1/1 1/1
1041 MOVD reg32, xmmreg Double FADD 3
1042 1/1 1/1 */
1043 16, /* size of l1 cache. */
1044 2048, /* size of l2 cache. */
1045 64, /* size of prefetch block */
1046 /* New AMD processors never drop prefetches; if they cannot be performed
1047 immediately, they are queued. We set number of simultaneous prefetches
1048 to a large constant to reflect this (it probably is not a good idea not
1049 to limit number of prefetches at all, as their execution also takes some
1050 time). */
1051 100, /* number of parallel prefetches */
1052 2, /* Branch cost */
1053 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1054 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1055 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1056 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1057 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1058 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1060 bdver1_memcpy,
1061 bdver1_memset,
1062 6, /* scalar_stmt_cost. */
1063 4, /* scalar load_cost. */
1064 4, /* scalar_store_cost. */
1065 6, /* vec_stmt_cost. */
1066 0, /* vec_to_scalar_cost. */
1067 2, /* scalar_to_vec_cost. */
1068 4, /* vec_align_load_cost. */
1069 4, /* vec_unalign_load_cost. */
1070 4, /* vec_store_cost. */
1071 4, /* cond_taken_branch_cost. */
1072 2, /* cond_not_taken_branch_cost. */
1075 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1076 very small blocks it is better to use loop. For large blocks, libcall
1077 can do nontemporary accesses and beat inline considerably. */
1079 static stringop_algs bdver2_memcpy[2] = {
1080 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1081 {-1, rep_prefix_4_byte, false}}},
1082 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1083 {-1, libcall, false}}}};
1084 static stringop_algs bdver2_memset[2] = {
1085 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1086 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1087 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1088 {-1, libcall, false}}}};
1090 const struct processor_costs bdver2_cost = {
1091 COSTS_N_INSNS (1), /* cost of an add instruction */
1092 COSTS_N_INSNS (1), /* cost of a lea instruction */
1093 COSTS_N_INSNS (1), /* variable shift costs */
1094 COSTS_N_INSNS (1), /* constant shift costs */
1095 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1096 COSTS_N_INSNS (4), /* HI */
1097 COSTS_N_INSNS (4), /* SI */
1098 COSTS_N_INSNS (6), /* DI */
1099 COSTS_N_INSNS (6)}, /* other */
1100 0, /* cost of multiply per each bit set */
1101 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1102 COSTS_N_INSNS (35), /* HI */
1103 COSTS_N_INSNS (51), /* SI */
1104 COSTS_N_INSNS (83), /* DI */
1105 COSTS_N_INSNS (83)}, /* other */
1106 COSTS_N_INSNS (1), /* cost of movsx */
1107 COSTS_N_INSNS (1), /* cost of movzx */
1108 8, /* "large" insn */
1109 9, /* MOVE_RATIO */
1110 4, /* cost for loading QImode using movzbl */
1111 {5, 5, 4}, /* cost of loading integer registers
1112 in QImode, HImode and SImode.
1113 Relative to reg-reg move (2). */
1114 {4, 4, 4}, /* cost of storing integer registers */
1115 2, /* cost of reg,reg fld/fst */
1116 {5, 5, 12}, /* cost of loading fp registers
1117 in SFmode, DFmode and XFmode */
1118 {4, 4, 8}, /* cost of storing fp registers
1119 in SFmode, DFmode and XFmode */
1120 2, /* cost of moving MMX register */
1121 {4, 4}, /* cost of loading MMX registers
1122 in SImode and DImode */
1123 {4, 4}, /* cost of storing MMX registers
1124 in SImode and DImode */
1125 2, /* cost of moving SSE register */
1126 {4, 4, 4}, /* cost of loading SSE registers
1127 in SImode, DImode and TImode */
1128 {4, 4, 4}, /* cost of storing SSE registers
1129 in SImode, DImode and TImode */
1130 2, /* MMX or SSE register to integer */
1131 /* On K8:
1132 MOVD reg64, xmmreg Double FSTORE 4
1133 MOVD reg32, xmmreg Double FSTORE 4
1134 On AMDFAM10:
1135 MOVD reg64, xmmreg Double FADD 3
1136 1/1 1/1
1137 MOVD reg32, xmmreg Double FADD 3
1138 1/1 1/1 */
1139 16, /* size of l1 cache. */
1140 2048, /* size of l2 cache. */
1141 64, /* size of prefetch block */
1142 /* New AMD processors never drop prefetches; if they cannot be performed
1143 immediately, they are queued. We set number of simultaneous prefetches
1144 to a large constant to reflect this (it probably is not a good idea not
1145 to limit number of prefetches at all, as their execution also takes some
1146 time). */
1147 100, /* number of parallel prefetches */
1148 2, /* Branch cost */
1149 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1150 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1151 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1152 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1153 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1154 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1156 bdver2_memcpy,
1157 bdver2_memset,
1158 6, /* scalar_stmt_cost. */
1159 4, /* scalar load_cost. */
1160 4, /* scalar_store_cost. */
1161 6, /* vec_stmt_cost. */
1162 0, /* vec_to_scalar_cost. */
1163 2, /* scalar_to_vec_cost. */
1164 4, /* vec_align_load_cost. */
1165 4, /* vec_unalign_load_cost. */
1166 4, /* vec_store_cost. */
1167 4, /* cond_taken_branch_cost. */
1168 2, /* cond_not_taken_branch_cost. */
1172 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1173 very small blocks it is better to use loop. For large blocks, libcall
1174 can do nontemporary accesses and beat inline considerably. */
1175 static stringop_algs bdver3_memcpy[2] = {
1176 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1177 {-1, rep_prefix_4_byte, false}}},
1178 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1179 {-1, libcall, false}}}};
1180 static stringop_algs bdver3_memset[2] = {
1181 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1182 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1183 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1184 {-1, libcall, false}}}};
1185 struct processor_costs bdver3_cost = {
1186 COSTS_N_INSNS (1), /* cost of an add instruction */
1187 COSTS_N_INSNS (1), /* cost of a lea instruction */
1188 COSTS_N_INSNS (1), /* variable shift costs */
1189 COSTS_N_INSNS (1), /* constant shift costs */
1190 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1191 COSTS_N_INSNS (4), /* HI */
1192 COSTS_N_INSNS (4), /* SI */
1193 COSTS_N_INSNS (6), /* DI */
1194 COSTS_N_INSNS (6)}, /* other */
1195 0, /* cost of multiply per each bit set */
1196 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1197 COSTS_N_INSNS (35), /* HI */
1198 COSTS_N_INSNS (51), /* SI */
1199 COSTS_N_INSNS (83), /* DI */
1200 COSTS_N_INSNS (83)}, /* other */
1201 COSTS_N_INSNS (1), /* cost of movsx */
1202 COSTS_N_INSNS (1), /* cost of movzx */
1203 8, /* "large" insn */
1204 9, /* MOVE_RATIO */
1205 4, /* cost for loading QImode using movzbl */
1206 {5, 5, 4}, /* cost of loading integer registers
1207 in QImode, HImode and SImode.
1208 Relative to reg-reg move (2). */
1209 {4, 4, 4}, /* cost of storing integer registers */
1210 2, /* cost of reg,reg fld/fst */
1211 {5, 5, 12}, /* cost of loading fp registers
1212 in SFmode, DFmode and XFmode */
1213 {4, 4, 8}, /* cost of storing fp registers
1214 in SFmode, DFmode and XFmode */
1215 2, /* cost of moving MMX register */
1216 {4, 4}, /* cost of loading MMX registers
1217 in SImode and DImode */
1218 {4, 4}, /* cost of storing MMX registers
1219 in SImode and DImode */
1220 2, /* cost of moving SSE register */
1221 {4, 4, 4}, /* cost of loading SSE registers
1222 in SImode, DImode and TImode */
1223 {4, 4, 4}, /* cost of storing SSE registers
1224 in SImode, DImode and TImode */
1225 2, /* MMX or SSE register to integer */
1226 16, /* size of l1 cache. */
1227 2048, /* size of l2 cache. */
1228 64, /* size of prefetch block */
1229 /* New AMD processors never drop prefetches; if they cannot be performed
1230 immediately, they are queued. We set number of simultaneous prefetches
1231 to a large constant to reflect this (it probably is not a good idea not
1232 to limit number of prefetches at all, as their execution also takes some
1233 time). */
1234 100, /* number of parallel prefetches */
1235 2, /* Branch cost */
1236 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1243 bdver3_memcpy,
1244 bdver3_memset,
1245 6, /* scalar_stmt_cost. */
1246 4, /* scalar load_cost. */
1247 4, /* scalar_store_cost. */
1248 6, /* vec_stmt_cost. */
1249 0, /* vec_to_scalar_cost. */
1250 2, /* scalar_to_vec_cost. */
1251 4, /* vec_align_load_cost. */
1252 4, /* vec_unalign_load_cost. */
1253 4, /* vec_store_cost. */
1254 4, /* cond_taken_branch_cost. */
1255 2, /* cond_not_taken_branch_cost. */
1258 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1259 very small blocks it is better to use loop. For large blocks, libcall
1260 can do nontemporary accesses and beat inline considerably. */
1261 static stringop_algs bdver4_memcpy[2] = {
1262 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1263 {-1, rep_prefix_4_byte, false}}},
1264 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1265 {-1, libcall, false}}}};
1266 static stringop_algs bdver4_memset[2] = {
1267 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1268 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1269 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1270 {-1, libcall, false}}}};
1271 struct processor_costs bdver4_cost = {
1272 COSTS_N_INSNS (1), /* cost of an add instruction */
1273 COSTS_N_INSNS (1), /* cost of a lea instruction */
1274 COSTS_N_INSNS (1), /* variable shift costs */
1275 COSTS_N_INSNS (1), /* constant shift costs */
1276 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1277 COSTS_N_INSNS (4), /* HI */
1278 COSTS_N_INSNS (4), /* SI */
1279 COSTS_N_INSNS (6), /* DI */
1280 COSTS_N_INSNS (6)}, /* other */
1281 0, /* cost of multiply per each bit set */
1282 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1283 COSTS_N_INSNS (35), /* HI */
1284 COSTS_N_INSNS (51), /* SI */
1285 COSTS_N_INSNS (83), /* DI */
1286 COSTS_N_INSNS (83)}, /* other */
1287 COSTS_N_INSNS (1), /* cost of movsx */
1288 COSTS_N_INSNS (1), /* cost of movzx */
1289 8, /* "large" insn */
1290 9, /* MOVE_RATIO */
1291 4, /* cost for loading QImode using movzbl */
1292 {5, 5, 4}, /* cost of loading integer registers
1293 in QImode, HImode and SImode.
1294 Relative to reg-reg move (2). */
1295 {4, 4, 4}, /* cost of storing integer registers */
1296 2, /* cost of reg,reg fld/fst */
1297 {5, 5, 12}, /* cost of loading fp registers
1298 in SFmode, DFmode and XFmode */
1299 {4, 4, 8}, /* cost of storing fp registers
1300 in SFmode, DFmode and XFmode */
1301 2, /* cost of moving MMX register */
1302 {4, 4}, /* cost of loading MMX registers
1303 in SImode and DImode */
1304 {4, 4}, /* cost of storing MMX registers
1305 in SImode and DImode */
1306 2, /* cost of moving SSE register */
1307 {4, 4, 4}, /* cost of loading SSE registers
1308 in SImode, DImode and TImode */
1309 {4, 4, 4}, /* cost of storing SSE registers
1310 in SImode, DImode and TImode */
1311 2, /* MMX or SSE register to integer */
1312 16, /* size of l1 cache. */
1313 2048, /* size of l2 cache. */
1314 64, /* size of prefetch block */
1315 /* New AMD processors never drop prefetches; if they cannot be performed
1316 immediately, they are queued. We set number of simultaneous prefetches
1317 to a large constant to reflect this (it probably is not a good idea not
1318 to limit number of prefetches at all, as their execution also takes some
1319 time). */
1320 100, /* number of parallel prefetches */
1321 2, /* Branch cost */
1322 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1323 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1324 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1325 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1326 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1327 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1329 bdver4_memcpy,
1330 bdver4_memset,
1331 6, /* scalar_stmt_cost. */
1332 4, /* scalar load_cost. */
1333 4, /* scalar_store_cost. */
1334 6, /* vec_stmt_cost. */
1335 0, /* vec_to_scalar_cost. */
1336 2, /* scalar_to_vec_cost. */
1337 4, /* vec_align_load_cost. */
1338 4, /* vec_unalign_load_cost. */
1339 4, /* vec_store_cost. */
1340 4, /* cond_taken_branch_cost. */
1341 2, /* cond_not_taken_branch_cost. */
1344 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1345 very small blocks it is better to use loop. For large blocks, libcall can
1346 do nontemporary accesses and beat inline considerably. */
1347 static stringop_algs btver1_memcpy[2] = {
1348 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1349 {-1, rep_prefix_4_byte, false}}},
1350 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1351 {-1, libcall, false}}}};
1352 static stringop_algs btver1_memset[2] = {
1353 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1354 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1355 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1356 {-1, libcall, false}}}};
1357 const struct processor_costs btver1_cost = {
1358 COSTS_N_INSNS (1), /* cost of an add instruction */
1359 COSTS_N_INSNS (2), /* cost of a lea instruction */
1360 COSTS_N_INSNS (1), /* variable shift costs */
1361 COSTS_N_INSNS (1), /* constant shift costs */
1362 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1363 COSTS_N_INSNS (4), /* HI */
1364 COSTS_N_INSNS (3), /* SI */
1365 COSTS_N_INSNS (4), /* DI */
1366 COSTS_N_INSNS (5)}, /* other */
1367 0, /* cost of multiply per each bit set */
1368 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1369 COSTS_N_INSNS (35), /* HI */
1370 COSTS_N_INSNS (51), /* SI */
1371 COSTS_N_INSNS (83), /* DI */
1372 COSTS_N_INSNS (83)}, /* other */
1373 COSTS_N_INSNS (1), /* cost of movsx */
1374 COSTS_N_INSNS (1), /* cost of movzx */
1375 8, /* "large" insn */
1376 9, /* MOVE_RATIO */
1377 4, /* cost for loading QImode using movzbl */
1378 {3, 4, 3}, /* cost of loading integer registers
1379 in QImode, HImode and SImode.
1380 Relative to reg-reg move (2). */
1381 {3, 4, 3}, /* cost of storing integer registers */
1382 4, /* cost of reg,reg fld/fst */
1383 {4, 4, 12}, /* cost of loading fp registers
1384 in SFmode, DFmode and XFmode */
1385 {6, 6, 8}, /* cost of storing fp registers
1386 in SFmode, DFmode and XFmode */
1387 2, /* cost of moving MMX register */
1388 {3, 3}, /* cost of loading MMX registers
1389 in SImode and DImode */
1390 {4, 4}, /* cost of storing MMX registers
1391 in SImode and DImode */
1392 2, /* cost of moving SSE register */
1393 {4, 4, 3}, /* cost of loading SSE registers
1394 in SImode, DImode and TImode */
1395 {4, 4, 5}, /* cost of storing SSE registers
1396 in SImode, DImode and TImode */
1397 3, /* MMX or SSE register to integer */
1398 /* On K8:
1399 MOVD reg64, xmmreg Double FSTORE 4
1400 MOVD reg32, xmmreg Double FSTORE 4
1401 On AMDFAM10:
1402 MOVD reg64, xmmreg Double FADD 3
1403 1/1 1/1
1404 MOVD reg32, xmmreg Double FADD 3
1405 1/1 1/1 */
1406 32, /* size of l1 cache. */
1407 512, /* size of l2 cache. */
1408 64, /* size of prefetch block */
1409 100, /* number of parallel prefetches */
1410 2, /* Branch cost */
1411 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1412 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1413 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1414 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1415 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1416 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1418 btver1_memcpy,
1419 btver1_memset,
1420 4, /* scalar_stmt_cost. */
1421 2, /* scalar load_cost. */
1422 2, /* scalar_store_cost. */
1423 6, /* vec_stmt_cost. */
1424 0, /* vec_to_scalar_cost. */
1425 2, /* scalar_to_vec_cost. */
1426 2, /* vec_align_load_cost. */
1427 2, /* vec_unalign_load_cost. */
1428 2, /* vec_store_cost. */
1429 2, /* cond_taken_branch_cost. */
1430 1, /* cond_not_taken_branch_cost. */
1433 static stringop_algs btver2_memcpy[2] = {
1434 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1435 {-1, rep_prefix_4_byte, false}}},
1436 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1437 {-1, libcall, false}}}};
1438 static stringop_algs btver2_memset[2] = {
1439 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1440 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1441 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1442 {-1, libcall, false}}}};
1443 const struct processor_costs btver2_cost = {
1444 COSTS_N_INSNS (1), /* cost of an add instruction */
1445 COSTS_N_INSNS (2), /* cost of a lea instruction */
1446 COSTS_N_INSNS (1), /* variable shift costs */
1447 COSTS_N_INSNS (1), /* constant shift costs */
1448 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1449 COSTS_N_INSNS (4), /* HI */
1450 COSTS_N_INSNS (3), /* SI */
1451 COSTS_N_INSNS (4), /* DI */
1452 COSTS_N_INSNS (5)}, /* other */
1453 0, /* cost of multiply per each bit set */
1454 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1455 COSTS_N_INSNS (35), /* HI */
1456 COSTS_N_INSNS (51), /* SI */
1457 COSTS_N_INSNS (83), /* DI */
1458 COSTS_N_INSNS (83)}, /* other */
1459 COSTS_N_INSNS (1), /* cost of movsx */
1460 COSTS_N_INSNS (1), /* cost of movzx */
1461 8, /* "large" insn */
1462 9, /* MOVE_RATIO */
1463 4, /* cost for loading QImode using movzbl */
1464 {3, 4, 3}, /* cost of loading integer registers
1465 in QImode, HImode and SImode.
1466 Relative to reg-reg move (2). */
1467 {3, 4, 3}, /* cost of storing integer registers */
1468 4, /* cost of reg,reg fld/fst */
1469 {4, 4, 12}, /* cost of loading fp registers
1470 in SFmode, DFmode and XFmode */
1471 {6, 6, 8}, /* cost of storing fp registers
1472 in SFmode, DFmode and XFmode */
1473 2, /* cost of moving MMX register */
1474 {3, 3}, /* cost of loading MMX registers
1475 in SImode and DImode */
1476 {4, 4}, /* cost of storing MMX registers
1477 in SImode and DImode */
1478 2, /* cost of moving SSE register */
1479 {4, 4, 3}, /* cost of loading SSE registers
1480 in SImode, DImode and TImode */
1481 {4, 4, 5}, /* cost of storing SSE registers
1482 in SImode, DImode and TImode */
1483 3, /* MMX or SSE register to integer */
1484 /* On K8:
1485 MOVD reg64, xmmreg Double FSTORE 4
1486 MOVD reg32, xmmreg Double FSTORE 4
1487 On AMDFAM10:
1488 MOVD reg64, xmmreg Double FADD 3
1489 1/1 1/1
1490 MOVD reg32, xmmreg Double FADD 3
1491 1/1 1/1 */
1492 32, /* size of l1 cache. */
1493 2048, /* size of l2 cache. */
1494 64, /* size of prefetch block */
1495 100, /* number of parallel prefetches */
1496 2, /* Branch cost */
1497 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1498 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1499 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1500 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1501 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1502 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1503 btver2_memcpy,
1504 btver2_memset,
1505 4, /* scalar_stmt_cost. */
1506 2, /* scalar load_cost. */
1507 2, /* scalar_store_cost. */
1508 6, /* vec_stmt_cost. */
1509 0, /* vec_to_scalar_cost. */
1510 2, /* scalar_to_vec_cost. */
1511 2, /* vec_align_load_cost. */
1512 2, /* vec_unalign_load_cost. */
1513 2, /* vec_store_cost. */
1514 2, /* cond_taken_branch_cost. */
1515 1, /* cond_not_taken_branch_cost. */
1518 static stringop_algs pentium4_memcpy[2] = {
1519 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1520 DUMMY_STRINGOP_ALGS};
1521 static stringop_algs pentium4_memset[2] = {
1522 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1523 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1524 DUMMY_STRINGOP_ALGS};
1526 static const
1527 struct processor_costs pentium4_cost = {
1528 COSTS_N_INSNS (1), /* cost of an add instruction */
1529 COSTS_N_INSNS (3), /* cost of a lea instruction */
1530 COSTS_N_INSNS (4), /* variable shift costs */
1531 COSTS_N_INSNS (4), /* constant shift costs */
1532 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1533 COSTS_N_INSNS (15), /* HI */
1534 COSTS_N_INSNS (15), /* SI */
1535 COSTS_N_INSNS (15), /* DI */
1536 COSTS_N_INSNS (15)}, /* other */
1537 0, /* cost of multiply per each bit set */
1538 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1539 COSTS_N_INSNS (56), /* HI */
1540 COSTS_N_INSNS (56), /* SI */
1541 COSTS_N_INSNS (56), /* DI */
1542 COSTS_N_INSNS (56)}, /* other */
1543 COSTS_N_INSNS (1), /* cost of movsx */
1544 COSTS_N_INSNS (1), /* cost of movzx */
1545 16, /* "large" insn */
1546 6, /* MOVE_RATIO */
1547 2, /* cost for loading QImode using movzbl */
1548 {4, 5, 4}, /* cost of loading integer registers
1549 in QImode, HImode and SImode.
1550 Relative to reg-reg move (2). */
1551 {2, 3, 2}, /* cost of storing integer registers */
1552 2, /* cost of reg,reg fld/fst */
1553 {2, 2, 6}, /* cost of loading fp registers
1554 in SFmode, DFmode and XFmode */
1555 {4, 4, 6}, /* cost of storing fp registers
1556 in SFmode, DFmode and XFmode */
1557 2, /* cost of moving MMX register */
1558 {2, 2}, /* cost of loading MMX registers
1559 in SImode and DImode */
1560 {2, 2}, /* cost of storing MMX registers
1561 in SImode and DImode */
1562 12, /* cost of moving SSE register */
1563 {12, 12, 12}, /* cost of loading SSE registers
1564 in SImode, DImode and TImode */
1565 {2, 2, 8}, /* cost of storing SSE registers
1566 in SImode, DImode and TImode */
1567 10, /* MMX or SSE register to integer */
1568 8, /* size of l1 cache. */
1569 256, /* size of l2 cache. */
1570 64, /* size of prefetch block */
1571 6, /* number of parallel prefetches */
1572 2, /* Branch cost */
1573 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1574 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1575 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1576 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1577 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1578 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1579 pentium4_memcpy,
1580 pentium4_memset,
1581 1, /* scalar_stmt_cost. */
1582 1, /* scalar load_cost. */
1583 1, /* scalar_store_cost. */
1584 1, /* vec_stmt_cost. */
1585 1, /* vec_to_scalar_cost. */
1586 1, /* scalar_to_vec_cost. */
1587 1, /* vec_align_load_cost. */
1588 2, /* vec_unalign_load_cost. */
1589 1, /* vec_store_cost. */
1590 3, /* cond_taken_branch_cost. */
1591 1, /* cond_not_taken_branch_cost. */
1594 static stringop_algs nocona_memcpy[2] = {
1595 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1596 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1597 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1599 static stringop_algs nocona_memset[2] = {
1600 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1601 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1602 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1603 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1605 static const
1606 struct processor_costs nocona_cost = {
1607 COSTS_N_INSNS (1), /* cost of an add instruction */
1608 COSTS_N_INSNS (1), /* cost of a lea instruction */
1609 COSTS_N_INSNS (1), /* variable shift costs */
1610 COSTS_N_INSNS (1), /* constant shift costs */
1611 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1612 COSTS_N_INSNS (10), /* HI */
1613 COSTS_N_INSNS (10), /* SI */
1614 COSTS_N_INSNS (10), /* DI */
1615 COSTS_N_INSNS (10)}, /* other */
1616 0, /* cost of multiply per each bit set */
1617 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1618 COSTS_N_INSNS (66), /* HI */
1619 COSTS_N_INSNS (66), /* SI */
1620 COSTS_N_INSNS (66), /* DI */
1621 COSTS_N_INSNS (66)}, /* other */
1622 COSTS_N_INSNS (1), /* cost of movsx */
1623 COSTS_N_INSNS (1), /* cost of movzx */
1624 16, /* "large" insn */
1625 17, /* MOVE_RATIO */
1626 4, /* cost for loading QImode using movzbl */
1627 {4, 4, 4}, /* cost of loading integer registers
1628 in QImode, HImode and SImode.
1629 Relative to reg-reg move (2). */
1630 {4, 4, 4}, /* cost of storing integer registers */
1631 3, /* cost of reg,reg fld/fst */
1632 {12, 12, 12}, /* cost of loading fp registers
1633 in SFmode, DFmode and XFmode */
1634 {4, 4, 4}, /* cost of storing fp registers
1635 in SFmode, DFmode and XFmode */
1636 6, /* cost of moving MMX register */
1637 {12, 12}, /* cost of loading MMX registers
1638 in SImode and DImode */
1639 {12, 12}, /* cost of storing MMX registers
1640 in SImode and DImode */
1641 6, /* cost of moving SSE register */
1642 {12, 12, 12}, /* cost of loading SSE registers
1643 in SImode, DImode and TImode */
1644 {12, 12, 12}, /* cost of storing SSE registers
1645 in SImode, DImode and TImode */
1646 8, /* MMX or SSE register to integer */
1647 8, /* size of l1 cache. */
1648 1024, /* size of l2 cache. */
1649 64, /* size of prefetch block */
1650 8, /* number of parallel prefetches */
1651 1, /* Branch cost */
1652 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1653 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1654 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1655 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1656 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1657 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1658 nocona_memcpy,
1659 nocona_memset,
1660 1, /* scalar_stmt_cost. */
1661 1, /* scalar load_cost. */
1662 1, /* scalar_store_cost. */
1663 1, /* vec_stmt_cost. */
1664 1, /* vec_to_scalar_cost. */
1665 1, /* scalar_to_vec_cost. */
1666 1, /* vec_align_load_cost. */
1667 2, /* vec_unalign_load_cost. */
1668 1, /* vec_store_cost. */
1669 3, /* cond_taken_branch_cost. */
1670 1, /* cond_not_taken_branch_cost. */
1673 static stringop_algs atom_memcpy[2] = {
1674 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1675 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1676 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1677 static stringop_algs atom_memset[2] = {
1678 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1679 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1680 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1681 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1682 static const
1683 struct processor_costs atom_cost = {
1684 COSTS_N_INSNS (1), /* cost of an add instruction */
1685 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1686 COSTS_N_INSNS (1), /* variable shift costs */
1687 COSTS_N_INSNS (1), /* constant shift costs */
1688 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1689 COSTS_N_INSNS (4), /* HI */
1690 COSTS_N_INSNS (3), /* SI */
1691 COSTS_N_INSNS (4), /* DI */
1692 COSTS_N_INSNS (2)}, /* other */
1693 0, /* cost of multiply per each bit set */
1694 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1695 COSTS_N_INSNS (26), /* HI */
1696 COSTS_N_INSNS (42), /* SI */
1697 COSTS_N_INSNS (74), /* DI */
1698 COSTS_N_INSNS (74)}, /* other */
1699 COSTS_N_INSNS (1), /* cost of movsx */
1700 COSTS_N_INSNS (1), /* cost of movzx */
1701 8, /* "large" insn */
1702 17, /* MOVE_RATIO */
1703 4, /* cost for loading QImode using movzbl */
1704 {4, 4, 4}, /* cost of loading integer registers
1705 in QImode, HImode and SImode.
1706 Relative to reg-reg move (2). */
1707 {4, 4, 4}, /* cost of storing integer registers */
1708 4, /* cost of reg,reg fld/fst */
1709 {12, 12, 12}, /* cost of loading fp registers
1710 in SFmode, DFmode and XFmode */
1711 {6, 6, 8}, /* cost of storing fp registers
1712 in SFmode, DFmode and XFmode */
1713 2, /* cost of moving MMX register */
1714 {8, 8}, /* cost of loading MMX registers
1715 in SImode and DImode */
1716 {8, 8}, /* cost of storing MMX registers
1717 in SImode and DImode */
1718 2, /* cost of moving SSE register */
1719 {8, 8, 8}, /* cost of loading SSE registers
1720 in SImode, DImode and TImode */
1721 {8, 8, 8}, /* cost of storing SSE registers
1722 in SImode, DImode and TImode */
1723 5, /* MMX or SSE register to integer */
1724 32, /* size of l1 cache. */
1725 256, /* size of l2 cache. */
1726 64, /* size of prefetch block */
1727 6, /* number of parallel prefetches */
1728 3, /* Branch cost */
1729 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1730 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1731 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1732 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1733 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1734 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1735 atom_memcpy,
1736 atom_memset,
1737 1, /* scalar_stmt_cost. */
1738 1, /* scalar load_cost. */
1739 1, /* scalar_store_cost. */
1740 1, /* vec_stmt_cost. */
1741 1, /* vec_to_scalar_cost. */
1742 1, /* scalar_to_vec_cost. */
1743 1, /* vec_align_load_cost. */
1744 2, /* vec_unalign_load_cost. */
1745 1, /* vec_store_cost. */
1746 3, /* cond_taken_branch_cost. */
1747 1, /* cond_not_taken_branch_cost. */
1750 static stringop_algs slm_memcpy[2] = {
1751 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1752 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1753 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1754 static stringop_algs slm_memset[2] = {
1755 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1756 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1757 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1758 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1759 static const
1760 struct processor_costs slm_cost = {
1761 COSTS_N_INSNS (1), /* cost of an add instruction */
1762 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1763 COSTS_N_INSNS (1), /* variable shift costs */
1764 COSTS_N_INSNS (1), /* constant shift costs */
1765 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1766 COSTS_N_INSNS (3), /* HI */
1767 COSTS_N_INSNS (3), /* SI */
1768 COSTS_N_INSNS (4), /* DI */
1769 COSTS_N_INSNS (2)}, /* other */
1770 0, /* cost of multiply per each bit set */
1771 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1772 COSTS_N_INSNS (26), /* HI */
1773 COSTS_N_INSNS (42), /* SI */
1774 COSTS_N_INSNS (74), /* DI */
1775 COSTS_N_INSNS (74)}, /* other */
1776 COSTS_N_INSNS (1), /* cost of movsx */
1777 COSTS_N_INSNS (1), /* cost of movzx */
1778 8, /* "large" insn */
1779 17, /* MOVE_RATIO */
1780 4, /* cost for loading QImode using movzbl */
1781 {4, 4, 4}, /* cost of loading integer registers
1782 in QImode, HImode and SImode.
1783 Relative to reg-reg move (2). */
1784 {4, 4, 4}, /* cost of storing integer registers */
1785 4, /* cost of reg,reg fld/fst */
1786 {12, 12, 12}, /* cost of loading fp registers
1787 in SFmode, DFmode and XFmode */
1788 {6, 6, 8}, /* cost of storing fp registers
1789 in SFmode, DFmode and XFmode */
1790 2, /* cost of moving MMX register */
1791 {8, 8}, /* cost of loading MMX registers
1792 in SImode and DImode */
1793 {8, 8}, /* cost of storing MMX registers
1794 in SImode and DImode */
1795 2, /* cost of moving SSE register */
1796 {8, 8, 8}, /* cost of loading SSE registers
1797 in SImode, DImode and TImode */
1798 {8, 8, 8}, /* cost of storing SSE registers
1799 in SImode, DImode and TImode */
1800 5, /* MMX or SSE register to integer */
1801 32, /* size of l1 cache. */
1802 256, /* size of l2 cache. */
1803 64, /* size of prefetch block */
1804 6, /* number of parallel prefetches */
1805 3, /* Branch cost */
1806 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1807 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1808 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1809 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1810 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1811 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1812 slm_memcpy,
1813 slm_memset,
1814 1, /* scalar_stmt_cost. */
1815 1, /* scalar load_cost. */
1816 1, /* scalar_store_cost. */
1817 1, /* vec_stmt_cost. */
1818 4, /* vec_to_scalar_cost. */
1819 1, /* scalar_to_vec_cost. */
1820 1, /* vec_align_load_cost. */
1821 2, /* vec_unalign_load_cost. */
1822 1, /* vec_store_cost. */
1823 3, /* cond_taken_branch_cost. */
1824 1, /* cond_not_taken_branch_cost. */
1827 static stringop_algs intel_memcpy[2] = {
1828 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1829 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1830 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1831 static stringop_algs intel_memset[2] = {
1832 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1833 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1834 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1835 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1836 static const
1837 struct processor_costs intel_cost = {
1838 COSTS_N_INSNS (1), /* cost of an add instruction */
1839 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1840 COSTS_N_INSNS (1), /* variable shift costs */
1841 COSTS_N_INSNS (1), /* constant shift costs */
1842 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1843 COSTS_N_INSNS (3), /* HI */
1844 COSTS_N_INSNS (3), /* SI */
1845 COSTS_N_INSNS (4), /* DI */
1846 COSTS_N_INSNS (2)}, /* other */
1847 0, /* cost of multiply per each bit set */
1848 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1849 COSTS_N_INSNS (26), /* HI */
1850 COSTS_N_INSNS (42), /* SI */
1851 COSTS_N_INSNS (74), /* DI */
1852 COSTS_N_INSNS (74)}, /* other */
1853 COSTS_N_INSNS (1), /* cost of movsx */
1854 COSTS_N_INSNS (1), /* cost of movzx */
1855 8, /* "large" insn */
1856 17, /* MOVE_RATIO */
1857 4, /* cost for loading QImode using movzbl */
1858 {4, 4, 4}, /* cost of loading integer registers
1859 in QImode, HImode and SImode.
1860 Relative to reg-reg move (2). */
1861 {4, 4, 4}, /* cost of storing integer registers */
1862 4, /* cost of reg,reg fld/fst */
1863 {12, 12, 12}, /* cost of loading fp registers
1864 in SFmode, DFmode and XFmode */
1865 {6, 6, 8}, /* cost of storing fp registers
1866 in SFmode, DFmode and XFmode */
1867 2, /* cost of moving MMX register */
1868 {8, 8}, /* cost of loading MMX registers
1869 in SImode and DImode */
1870 {8, 8}, /* cost of storing MMX registers
1871 in SImode and DImode */
1872 2, /* cost of moving SSE register */
1873 {8, 8, 8}, /* cost of loading SSE registers
1874 in SImode, DImode and TImode */
1875 {8, 8, 8}, /* cost of storing SSE registers
1876 in SImode, DImode and TImode */
1877 5, /* MMX or SSE register to integer */
1878 32, /* size of l1 cache. */
1879 256, /* size of l2 cache. */
1880 64, /* size of prefetch block */
1881 6, /* number of parallel prefetches */
1882 3, /* Branch cost */
1883 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1884 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1885 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1886 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1887 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1888 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1889 intel_memcpy,
1890 intel_memset,
1891 1, /* scalar_stmt_cost. */
1892 1, /* scalar load_cost. */
1893 1, /* scalar_store_cost. */
1894 1, /* vec_stmt_cost. */
1895 4, /* vec_to_scalar_cost. */
1896 1, /* scalar_to_vec_cost. */
1897 1, /* vec_align_load_cost. */
1898 2, /* vec_unalign_load_cost. */
1899 1, /* vec_store_cost. */
1900 3, /* cond_taken_branch_cost. */
1901 1, /* cond_not_taken_branch_cost. */
1904 /* Generic should produce code tuned for Core-i7 (and newer chips)
1905 and btver1 (and newer chips). */
1907 static stringop_algs generic_memcpy[2] = {
1908 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1909 {-1, libcall, false}}},
1910 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1911 {-1, libcall, false}}}};
1912 static stringop_algs generic_memset[2] = {
1913 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1914 {-1, libcall, false}}},
1915 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1916 {-1, libcall, false}}}};
1917 static const
1918 struct processor_costs generic_cost = {
1919 COSTS_N_INSNS (1), /* cost of an add instruction */
1920 /* On all chips taken into consideration lea is 2 cycles and more. With
1921 this cost however our current implementation of synth_mult results in
1922 use of unnecessary temporary registers causing regression on several
1923 SPECfp benchmarks. */
1924 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1925 COSTS_N_INSNS (1), /* variable shift costs */
1926 COSTS_N_INSNS (1), /* constant shift costs */
1927 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1928 COSTS_N_INSNS (4), /* HI */
1929 COSTS_N_INSNS (3), /* SI */
1930 COSTS_N_INSNS (4), /* DI */
1931 COSTS_N_INSNS (2)}, /* other */
1932 0, /* cost of multiply per each bit set */
1933 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1934 COSTS_N_INSNS (26), /* HI */
1935 COSTS_N_INSNS (42), /* SI */
1936 COSTS_N_INSNS (74), /* DI */
1937 COSTS_N_INSNS (74)}, /* other */
1938 COSTS_N_INSNS (1), /* cost of movsx */
1939 COSTS_N_INSNS (1), /* cost of movzx */
1940 8, /* "large" insn */
1941 17, /* MOVE_RATIO */
1942 4, /* cost for loading QImode using movzbl */
1943 {4, 4, 4}, /* cost of loading integer registers
1944 in QImode, HImode and SImode.
1945 Relative to reg-reg move (2). */
1946 {4, 4, 4}, /* cost of storing integer registers */
1947 4, /* cost of reg,reg fld/fst */
1948 {12, 12, 12}, /* cost of loading fp registers
1949 in SFmode, DFmode and XFmode */
1950 {6, 6, 8}, /* cost of storing fp registers
1951 in SFmode, DFmode and XFmode */
1952 2, /* cost of moving MMX register */
1953 {8, 8}, /* cost of loading MMX registers
1954 in SImode and DImode */
1955 {8, 8}, /* cost of storing MMX registers
1956 in SImode and DImode */
1957 2, /* cost of moving SSE register */
1958 {8, 8, 8}, /* cost of loading SSE registers
1959 in SImode, DImode and TImode */
1960 {8, 8, 8}, /* cost of storing SSE registers
1961 in SImode, DImode and TImode */
1962 5, /* MMX or SSE register to integer */
1963 32, /* size of l1 cache. */
1964 512, /* size of l2 cache. */
1965 64, /* size of prefetch block */
1966 6, /* number of parallel prefetches */
1967 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1968 value is increased to perhaps more appropriate value of 5. */
1969 3, /* Branch cost */
1970 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1971 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1972 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1973 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1974 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1975 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1976 generic_memcpy,
1977 generic_memset,
1978 1, /* scalar_stmt_cost. */
1979 1, /* scalar load_cost. */
1980 1, /* scalar_store_cost. */
1981 1, /* vec_stmt_cost. */
1982 1, /* vec_to_scalar_cost. */
1983 1, /* scalar_to_vec_cost. */
1984 1, /* vec_align_load_cost. */
1985 2, /* vec_unalign_load_cost. */
1986 1, /* vec_store_cost. */
1987 3, /* cond_taken_branch_cost. */
1988 1, /* cond_not_taken_branch_cost. */
1991 /* core_cost should produce code tuned for Core familly of CPUs. */
1992 static stringop_algs core_memcpy[2] = {
1993 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1994 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1995 {-1, libcall, false}}}};
1996 static stringop_algs core_memset[2] = {
1997 {libcall, {{6, loop_1_byte, true},
1998 {24, loop, true},
1999 {8192, rep_prefix_4_byte, true},
2000 {-1, libcall, false}}},
2001 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2002 {-1, libcall, false}}}};
2004 static const
2005 struct processor_costs core_cost = {
2006 COSTS_N_INSNS (1), /* cost of an add instruction */
2007 /* On all chips taken into consideration lea is 2 cycles and more. With
2008 this cost however our current implementation of synth_mult results in
2009 use of unnecessary temporary registers causing regression on several
2010 SPECfp benchmarks. */
2011 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2012 COSTS_N_INSNS (1), /* variable shift costs */
2013 COSTS_N_INSNS (1), /* constant shift costs */
2014 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2015 COSTS_N_INSNS (4), /* HI */
2016 COSTS_N_INSNS (3), /* SI */
2017 COSTS_N_INSNS (4), /* DI */
2018 COSTS_N_INSNS (2)}, /* other */
2019 0, /* cost of multiply per each bit set */
2020 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2021 COSTS_N_INSNS (26), /* HI */
2022 COSTS_N_INSNS (42), /* SI */
2023 COSTS_N_INSNS (74), /* DI */
2024 COSTS_N_INSNS (74)}, /* other */
2025 COSTS_N_INSNS (1), /* cost of movsx */
2026 COSTS_N_INSNS (1), /* cost of movzx */
2027 8, /* "large" insn */
2028 17, /* MOVE_RATIO */
2029 4, /* cost for loading QImode using movzbl */
2030 {4, 4, 4}, /* cost of loading integer registers
2031 in QImode, HImode and SImode.
2032 Relative to reg-reg move (2). */
2033 {4, 4, 4}, /* cost of storing integer registers */
2034 4, /* cost of reg,reg fld/fst */
2035 {12, 12, 12}, /* cost of loading fp registers
2036 in SFmode, DFmode and XFmode */
2037 {6, 6, 8}, /* cost of storing fp registers
2038 in SFmode, DFmode and XFmode */
2039 2, /* cost of moving MMX register */
2040 {8, 8}, /* cost of loading MMX registers
2041 in SImode and DImode */
2042 {8, 8}, /* cost of storing MMX registers
2043 in SImode and DImode */
2044 2, /* cost of moving SSE register */
2045 {8, 8, 8}, /* cost of loading SSE registers
2046 in SImode, DImode and TImode */
2047 {8, 8, 8}, /* cost of storing SSE registers
2048 in SImode, DImode and TImode */
2049 5, /* MMX or SSE register to integer */
2050 64, /* size of l1 cache. */
2051 512, /* size of l2 cache. */
2052 64, /* size of prefetch block */
2053 6, /* number of parallel prefetches */
2054 /* FIXME perhaps more appropriate value is 5. */
2055 3, /* Branch cost */
2056 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2057 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2058 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2059 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2060 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2061 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2062 core_memcpy,
2063 core_memset,
2064 1, /* scalar_stmt_cost. */
2065 1, /* scalar load_cost. */
2066 1, /* scalar_store_cost. */
2067 1, /* vec_stmt_cost. */
2068 1, /* vec_to_scalar_cost. */
2069 1, /* scalar_to_vec_cost. */
2070 1, /* vec_align_load_cost. */
2071 2, /* vec_unalign_load_cost. */
2072 1, /* vec_store_cost. */
2073 3, /* cond_taken_branch_cost. */
2074 1, /* cond_not_taken_branch_cost. */
2078 /* Set by -mtune. */
2079 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2081 /* Set by -mtune or -Os. */
2082 const struct processor_costs *ix86_cost = &pentium_cost;
2084 /* Processor feature/optimization bitmasks. */
2085 #define m_386 (1<<PROCESSOR_I386)
2086 #define m_486 (1<<PROCESSOR_I486)
2087 #define m_PENT (1<<PROCESSOR_PENTIUM)
2088 #define m_IAMCU (1<<PROCESSOR_IAMCU)
2089 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2090 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2091 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2092 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2093 #define m_CORE2 (1<<PROCESSOR_CORE2)
2094 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2095 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2096 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2097 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2098 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2099 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2100 #define m_KNL (1<<PROCESSOR_KNL)
2101 #define m_INTEL (1<<PROCESSOR_INTEL)
2103 #define m_GEODE (1<<PROCESSOR_GEODE)
2104 #define m_K6 (1<<PROCESSOR_K6)
2105 #define m_K6_GEODE (m_K6 | m_GEODE)
2106 #define m_K8 (1<<PROCESSOR_K8)
2107 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2108 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2109 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2110 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2111 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2112 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2113 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2114 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2115 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2116 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2117 #define m_BTVER (m_BTVER1 | m_BTVER2)
2118 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2120 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2122 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2123 #undef DEF_TUNE
2124 #define DEF_TUNE(tune, name, selector) name,
2125 #include "x86-tune.def"
2126 #undef DEF_TUNE
2129 /* Feature tests against the various tunings. */
2130 unsigned char ix86_tune_features[X86_TUNE_LAST];
2132 /* Feature tests against the various tunings used to create ix86_tune_features
2133 based on the processor mask. */
2134 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2135 #undef DEF_TUNE
2136 #define DEF_TUNE(tune, name, selector) selector,
2137 #include "x86-tune.def"
2138 #undef DEF_TUNE
2141 /* Feature tests against the various architecture variations. */
2142 unsigned char ix86_arch_features[X86_ARCH_LAST];
2144 /* Feature tests against the various architecture variations, used to create
2145 ix86_arch_features based on the processor mask. */
2146 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2147 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2148 ~(m_386 | m_486 | m_PENT | m_IAMCU | m_K6),
2150 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2151 ~m_386,
2153 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2154 ~(m_386 | m_486),
2156 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2157 ~m_386,
2159 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2160 ~m_386,
2163 /* In case the average insn count for single function invocation is
2164 lower than this constant, emit fast (but longer) prologue and
2165 epilogue code. */
2166 #define FAST_PROLOGUE_INSN_COUNT 20
2168 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2169 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2170 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2171 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2173 /* Array of the smallest class containing reg number REGNO, indexed by
2174 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2176 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2178 /* ax, dx, cx, bx */
2179 AREG, DREG, CREG, BREG,
2180 /* si, di, bp, sp */
2181 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2182 /* FP registers */
2183 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2184 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2185 /* arg pointer */
2186 NON_Q_REGS,
2187 /* flags, fpsr, fpcr, frame */
2188 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2189 /* SSE registers */
2190 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2191 SSE_REGS, SSE_REGS,
2192 /* MMX registers */
2193 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2194 MMX_REGS, MMX_REGS,
2195 /* REX registers */
2196 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2197 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2198 /* SSE REX registers */
2199 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2200 SSE_REGS, SSE_REGS,
2201 /* AVX-512 SSE registers */
2202 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2203 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2204 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2205 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2206 /* Mask registers. */
2207 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2208 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2209 /* MPX bound registers */
2210 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2213 /* The "default" register map used in 32bit mode. */
2215 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2217 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2218 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2219 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2220 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2221 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2222 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2223 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2224 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2226 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2227 101, 102, 103, 104, /* bound registers */
2230 /* The "default" register map used in 64bit mode. */
2232 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2234 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2235 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2236 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2237 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2238 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2239 8,9,10,11,12,13,14,15, /* extended integer registers */
2240 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2241 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2242 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2243 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2244 126, 127, 128, 129, /* bound registers */
2247 /* Define the register numbers to be used in Dwarf debugging information.
2248 The SVR4 reference port C compiler uses the following register numbers
2249 in its Dwarf output code:
2250 0 for %eax (gcc regno = 0)
2251 1 for %ecx (gcc regno = 2)
2252 2 for %edx (gcc regno = 1)
2253 3 for %ebx (gcc regno = 3)
2254 4 for %esp (gcc regno = 7)
2255 5 for %ebp (gcc regno = 6)
2256 6 for %esi (gcc regno = 4)
2257 7 for %edi (gcc regno = 5)
2258 The following three DWARF register numbers are never generated by
2259 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2260 believes these numbers have these meanings.
2261 8 for %eip (no gcc equivalent)
2262 9 for %eflags (gcc regno = 17)
2263 10 for %trapno (no gcc equivalent)
2264 It is not at all clear how we should number the FP stack registers
2265 for the x86 architecture. If the version of SDB on x86/svr4 were
2266 a bit less brain dead with respect to floating-point then we would
2267 have a precedent to follow with respect to DWARF register numbers
2268 for x86 FP registers, but the SDB on x86/svr4 is so completely
2269 broken with respect to FP registers that it is hardly worth thinking
2270 of it as something to strive for compatibility with.
2271 The version of x86/svr4 SDB I have at the moment does (partially)
2272 seem to believe that DWARF register number 11 is associated with
2273 the x86 register %st(0), but that's about all. Higher DWARF
2274 register numbers don't seem to be associated with anything in
2275 particular, and even for DWARF regno 11, SDB only seems to under-
2276 stand that it should say that a variable lives in %st(0) (when
2277 asked via an `=' command) if we said it was in DWARF regno 11,
2278 but SDB still prints garbage when asked for the value of the
2279 variable in question (via a `/' command).
2280 (Also note that the labels SDB prints for various FP stack regs
2281 when doing an `x' command are all wrong.)
2282 Note that these problems generally don't affect the native SVR4
2283 C compiler because it doesn't allow the use of -O with -g and
2284 because when it is *not* optimizing, it allocates a memory
2285 location for each floating-point variable, and the memory
2286 location is what gets described in the DWARF AT_location
2287 attribute for the variable in question.
2288 Regardless of the severe mental illness of the x86/svr4 SDB, we
2289 do something sensible here and we use the following DWARF
2290 register numbers. Note that these are all stack-top-relative
2291 numbers.
2292 11 for %st(0) (gcc regno = 8)
2293 12 for %st(1) (gcc regno = 9)
2294 13 for %st(2) (gcc regno = 10)
2295 14 for %st(3) (gcc regno = 11)
2296 15 for %st(4) (gcc regno = 12)
2297 16 for %st(5) (gcc regno = 13)
2298 17 for %st(6) (gcc regno = 14)
2299 18 for %st(7) (gcc regno = 15)
2301 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2303 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2304 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2305 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2306 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2307 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2308 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2309 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2310 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2311 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2312 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2313 101, 102, 103, 104, /* bound registers */
2316 /* Define parameter passing and return registers. */
2318 static int const x86_64_int_parameter_registers[6] =
2320 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2323 static int const x86_64_ms_abi_int_parameter_registers[4] =
2325 CX_REG, DX_REG, R8_REG, R9_REG
2328 static int const x86_64_int_return_registers[4] =
2330 AX_REG, DX_REG, DI_REG, SI_REG
2333 /* Additional registers that are clobbered by SYSV calls. */
2335 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2337 SI_REG, DI_REG,
2338 XMM6_REG, XMM7_REG,
2339 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2340 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2343 /* Define the structure for the machine field in struct function. */
2345 struct GTY(()) stack_local_entry {
2346 unsigned short mode;
2347 unsigned short n;
2348 rtx rtl;
2349 struct stack_local_entry *next;
2352 /* Structure describing stack frame layout.
2353 Stack grows downward:
2355 [arguments]
2356 <- ARG_POINTER
2357 saved pc
2359 saved static chain if ix86_static_chain_on_stack
2361 saved frame pointer if frame_pointer_needed
2362 <- HARD_FRAME_POINTER
2363 [saved regs]
2364 <- regs_save_offset
2365 [padding0]
2367 [saved SSE regs]
2368 <- sse_regs_save_offset
2369 [padding1] |
2370 | <- FRAME_POINTER
2371 [va_arg registers] |
2373 [frame] |
2375 [padding2] | = to_allocate
2376 <- STACK_POINTER
2378 struct ix86_frame
2380 int nsseregs;
2381 int nregs;
2382 int va_arg_size;
2383 int red_zone_size;
2384 int outgoing_arguments_size;
2386 /* The offsets relative to ARG_POINTER. */
2387 HOST_WIDE_INT frame_pointer_offset;
2388 HOST_WIDE_INT hard_frame_pointer_offset;
2389 HOST_WIDE_INT stack_pointer_offset;
2390 HOST_WIDE_INT hfp_save_offset;
2391 HOST_WIDE_INT reg_save_offset;
2392 HOST_WIDE_INT sse_reg_save_offset;
2394 /* When save_regs_using_mov is set, emit prologue using
2395 move instead of push instructions. */
2396 bool save_regs_using_mov;
2399 /* Which cpu are we scheduling for. */
2400 enum attr_cpu ix86_schedule;
2402 /* Which cpu are we optimizing for. */
2403 enum processor_type ix86_tune;
2405 /* Which instruction set architecture to use. */
2406 enum processor_type ix86_arch;
2408 /* True if processor has SSE prefetch instruction. */
2409 unsigned char x86_prefetch_sse;
2411 /* -mstackrealign option */
2412 static const char ix86_force_align_arg_pointer_string[]
2413 = "force_align_arg_pointer";
2415 static rtx (*ix86_gen_leave) (void);
2416 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2417 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2418 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2419 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2420 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2421 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2422 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2423 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2424 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2425 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2426 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2427 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2429 /* Preferred alignment for stack boundary in bits. */
2430 unsigned int ix86_preferred_stack_boundary;
2432 /* Alignment for incoming stack boundary in bits specified at
2433 command line. */
2434 static unsigned int ix86_user_incoming_stack_boundary;
2436 /* Default alignment for incoming stack boundary in bits. */
2437 static unsigned int ix86_default_incoming_stack_boundary;
2439 /* Alignment for incoming stack boundary in bits. */
2440 unsigned int ix86_incoming_stack_boundary;
2442 /* Calling abi specific va_list type nodes. */
2443 static GTY(()) tree sysv_va_list_type_node;
2444 static GTY(()) tree ms_va_list_type_node;
2446 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2447 char internal_label_prefix[16];
2448 int internal_label_prefix_len;
2450 /* Fence to use after loop using movnt. */
2451 tree x86_mfence;
2453 /* Register class used for passing given 64bit part of the argument.
2454 These represent classes as documented by the PS ABI, with the exception
2455 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2456 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2458 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2459 whenever possible (upper half does contain padding). */
2460 enum x86_64_reg_class
2462 X86_64_NO_CLASS,
2463 X86_64_INTEGER_CLASS,
2464 X86_64_INTEGERSI_CLASS,
2465 X86_64_SSE_CLASS,
2466 X86_64_SSESF_CLASS,
2467 X86_64_SSEDF_CLASS,
2468 X86_64_SSEUP_CLASS,
2469 X86_64_X87_CLASS,
2470 X86_64_X87UP_CLASS,
2471 X86_64_COMPLEX_X87_CLASS,
2472 X86_64_MEMORY_CLASS
2475 #define MAX_CLASSES 8
2477 /* Table of constants used by fldpi, fldln2, etc.... */
2478 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2479 static bool ext_80387_constants_init = 0;
2482 static struct machine_function * ix86_init_machine_status (void);
2483 static rtx ix86_function_value (const_tree, const_tree, bool);
2484 static bool ix86_function_value_regno_p (const unsigned int);
2485 static unsigned int ix86_function_arg_boundary (machine_mode,
2486 const_tree);
2487 static rtx ix86_static_chain (const_tree, bool);
2488 static int ix86_function_regparm (const_tree, const_tree);
2489 static void ix86_compute_frame_layout (struct ix86_frame *);
2490 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2491 rtx, rtx, int);
2492 static void ix86_add_new_builtins (HOST_WIDE_INT);
2493 static tree ix86_canonical_va_list_type (tree);
2494 static void predict_jump (int);
2495 static unsigned int split_stack_prologue_scratch_regno (void);
2496 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2498 enum ix86_function_specific_strings
2500 IX86_FUNCTION_SPECIFIC_ARCH,
2501 IX86_FUNCTION_SPECIFIC_TUNE,
2502 IX86_FUNCTION_SPECIFIC_MAX
2505 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2506 const char *, enum fpmath_unit, bool);
2507 static void ix86_function_specific_save (struct cl_target_option *,
2508 struct gcc_options *opts);
2509 static void ix86_function_specific_restore (struct gcc_options *opts,
2510 struct cl_target_option *);
2511 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2512 static void ix86_function_specific_print (FILE *, int,
2513 struct cl_target_option *);
2514 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2515 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2516 struct gcc_options *,
2517 struct gcc_options *,
2518 struct gcc_options *);
2519 static bool ix86_can_inline_p (tree, tree);
2520 static void ix86_set_current_function (tree);
2521 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2523 static enum calling_abi ix86_function_abi (const_tree);
2526 #ifndef SUBTARGET32_DEFAULT_CPU
2527 #define SUBTARGET32_DEFAULT_CPU "i386"
2528 #endif
2530 /* Whether -mtune= or -march= were specified */
2531 static int ix86_tune_defaulted;
2532 static int ix86_arch_specified;
2534 /* Vectorization library interface and handlers. */
2535 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2537 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2538 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2540 /* Processor target table, indexed by processor number */
2541 struct ptt
2543 const char *const name; /* processor name */
2544 const struct processor_costs *cost; /* Processor costs */
2545 const int align_loop; /* Default alignments. */
2546 const int align_loop_max_skip;
2547 const int align_jump;
2548 const int align_jump_max_skip;
2549 const int align_func;
2552 /* This table must be in sync with enum processor_type in i386.h. */
2553 static const struct ptt processor_target_table[PROCESSOR_max] =
2555 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2556 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2557 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2558 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2559 {"iamcu", &iamcu_cost, 16, 7, 16, 7, 16},
2560 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2561 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2562 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2563 {"core2", &core_cost, 16, 10, 16, 10, 16},
2564 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2565 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2566 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2567 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2568 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2569 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2570 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2571 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2572 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2573 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2574 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2575 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2576 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2577 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2578 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2579 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2580 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2581 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2584 static unsigned int
2585 rest_of_handle_insert_vzeroupper (void)
2587 int i;
2589 /* vzeroupper instructions are inserted immediately after reload to
2590 account for possible spills from 256bit registers. The pass
2591 reuses mode switching infrastructure by re-running mode insertion
2592 pass, so disable entities that have already been processed. */
2593 for (i = 0; i < MAX_386_ENTITIES; i++)
2594 ix86_optimize_mode_switching[i] = 0;
2596 ix86_optimize_mode_switching[AVX_U128] = 1;
2598 /* Call optimize_mode_switching. */
2599 g->get_passes ()->execute_pass_mode_switching ();
2600 return 0;
2603 namespace {
2605 const pass_data pass_data_insert_vzeroupper =
2607 RTL_PASS, /* type */
2608 "vzeroupper", /* name */
2609 OPTGROUP_NONE, /* optinfo_flags */
2610 TV_NONE, /* tv_id */
2611 0, /* properties_required */
2612 0, /* properties_provided */
2613 0, /* properties_destroyed */
2614 0, /* todo_flags_start */
2615 TODO_df_finish, /* todo_flags_finish */
2618 class pass_insert_vzeroupper : public rtl_opt_pass
2620 public:
2621 pass_insert_vzeroupper(gcc::context *ctxt)
2622 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2625 /* opt_pass methods: */
2626 virtual bool gate (function *)
2628 return TARGET_AVX && !TARGET_AVX512F
2629 && TARGET_VZEROUPPER && flag_expensive_optimizations
2630 && !optimize_size;
2633 virtual unsigned int execute (function *)
2635 return rest_of_handle_insert_vzeroupper ();
2638 }; // class pass_insert_vzeroupper
2640 } // anon namespace
2642 rtl_opt_pass *
2643 make_pass_insert_vzeroupper (gcc::context *ctxt)
2645 return new pass_insert_vzeroupper (ctxt);
2648 /* Return true if a red-zone is in use. */
2650 static inline bool
2651 ix86_using_red_zone (void)
2653 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2656 /* Return a string that documents the current -m options. The caller is
2657 responsible for freeing the string. */
2659 static char *
2660 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2661 const char *tune, enum fpmath_unit fpmath,
2662 bool add_nl_p)
2664 struct ix86_target_opts
2666 const char *option; /* option string */
2667 HOST_WIDE_INT mask; /* isa mask options */
2670 /* This table is ordered so that options like -msse4.2 that imply
2671 preceding options while match those first. */
2672 static struct ix86_target_opts isa_opts[] =
2674 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2675 { "-mfma", OPTION_MASK_ISA_FMA },
2676 { "-mxop", OPTION_MASK_ISA_XOP },
2677 { "-mlwp", OPTION_MASK_ISA_LWP },
2678 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2679 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2680 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2681 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2682 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2683 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2684 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2685 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2686 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2687 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2688 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2689 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2690 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2691 { "-msse3", OPTION_MASK_ISA_SSE3 },
2692 { "-msse2", OPTION_MASK_ISA_SSE2 },
2693 { "-msse", OPTION_MASK_ISA_SSE },
2694 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2695 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2696 { "-mmmx", OPTION_MASK_ISA_MMX },
2697 { "-mabm", OPTION_MASK_ISA_ABM },
2698 { "-mbmi", OPTION_MASK_ISA_BMI },
2699 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2700 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2701 { "-mhle", OPTION_MASK_ISA_HLE },
2702 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2703 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2704 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2705 { "-madx", OPTION_MASK_ISA_ADX },
2706 { "-mtbm", OPTION_MASK_ISA_TBM },
2707 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2708 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2709 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2710 { "-maes", OPTION_MASK_ISA_AES },
2711 { "-msha", OPTION_MASK_ISA_SHA },
2712 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2713 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2714 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2715 { "-mf16c", OPTION_MASK_ISA_F16C },
2716 { "-mrtm", OPTION_MASK_ISA_RTM },
2717 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2718 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2719 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2720 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2721 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2722 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2723 { "-mmpx", OPTION_MASK_ISA_MPX },
2724 { "-mclwb", OPTION_MASK_ISA_CLWB },
2725 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2726 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2729 /* Flag options. */
2730 static struct ix86_target_opts flag_opts[] =
2732 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2733 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2734 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2735 { "-m80387", MASK_80387 },
2736 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2737 { "-malign-double", MASK_ALIGN_DOUBLE },
2738 { "-mcld", MASK_CLD },
2739 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2740 { "-mieee-fp", MASK_IEEE_FP },
2741 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2742 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2743 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2744 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2745 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2746 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2747 { "-mno-red-zone", MASK_NO_RED_ZONE },
2748 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2749 { "-mrecip", MASK_RECIP },
2750 { "-mrtd", MASK_RTD },
2751 { "-msseregparm", MASK_SSEREGPARM },
2752 { "-mstack-arg-probe", MASK_STACK_PROBE },
2753 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2754 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2755 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2756 { "-mvzeroupper", MASK_VZEROUPPER },
2757 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2758 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2759 { "-mprefer-avx128", MASK_PREFER_AVX128},
2762 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2764 char isa_other[40];
2765 char target_other[40];
2766 unsigned num = 0;
2767 unsigned i, j;
2768 char *ret;
2769 char *ptr;
2770 size_t len;
2771 size_t line_len;
2772 size_t sep_len;
2773 const char *abi;
2775 memset (opts, '\0', sizeof (opts));
2777 /* Add -march= option. */
2778 if (arch)
2780 opts[num][0] = "-march=";
2781 opts[num++][1] = arch;
2784 /* Add -mtune= option. */
2785 if (tune)
2787 opts[num][0] = "-mtune=";
2788 opts[num++][1] = tune;
2791 /* Add -m32/-m64/-mx32. */
2792 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2794 if ((isa & OPTION_MASK_ABI_64) != 0)
2795 abi = "-m64";
2796 else
2797 abi = "-mx32";
2798 isa &= ~ (OPTION_MASK_ISA_64BIT
2799 | OPTION_MASK_ABI_64
2800 | OPTION_MASK_ABI_X32);
2802 else
2803 abi = "-m32";
2804 opts[num++][0] = abi;
2806 /* Pick out the options in isa options. */
2807 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2809 if ((isa & isa_opts[i].mask) != 0)
2811 opts[num++][0] = isa_opts[i].option;
2812 isa &= ~ isa_opts[i].mask;
2816 if (isa && add_nl_p)
2818 opts[num++][0] = isa_other;
2819 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2820 isa);
2823 /* Add flag options. */
2824 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2826 if ((flags & flag_opts[i].mask) != 0)
2828 opts[num++][0] = flag_opts[i].option;
2829 flags &= ~ flag_opts[i].mask;
2833 if (flags && add_nl_p)
2835 opts[num++][0] = target_other;
2836 sprintf (target_other, "(other flags: %#x)", flags);
2839 /* Add -fpmath= option. */
2840 if (fpmath)
2842 opts[num][0] = "-mfpmath=";
2843 switch ((int) fpmath)
2845 case FPMATH_387:
2846 opts[num++][1] = "387";
2847 break;
2849 case FPMATH_SSE:
2850 opts[num++][1] = "sse";
2851 break;
2853 case FPMATH_387 | FPMATH_SSE:
2854 opts[num++][1] = "sse+387";
2855 break;
2857 default:
2858 gcc_unreachable ();
2862 /* Any options? */
2863 if (num == 0)
2864 return NULL;
2866 gcc_assert (num < ARRAY_SIZE (opts));
2868 /* Size the string. */
2869 len = 0;
2870 sep_len = (add_nl_p) ? 3 : 1;
2871 for (i = 0; i < num; i++)
2873 len += sep_len;
2874 for (j = 0; j < 2; j++)
2875 if (opts[i][j])
2876 len += strlen (opts[i][j]);
2879 /* Build the string. */
2880 ret = ptr = (char *) xmalloc (len);
2881 line_len = 0;
2883 for (i = 0; i < num; i++)
2885 size_t len2[2];
2887 for (j = 0; j < 2; j++)
2888 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2890 if (i != 0)
2892 *ptr++ = ' ';
2893 line_len++;
2895 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2897 *ptr++ = '\\';
2898 *ptr++ = '\n';
2899 line_len = 0;
2903 for (j = 0; j < 2; j++)
2904 if (opts[i][j])
2906 memcpy (ptr, opts[i][j], len2[j]);
2907 ptr += len2[j];
2908 line_len += len2[j];
2912 *ptr = '\0';
2913 gcc_assert (ret + len >= ptr);
2915 return ret;
2918 /* Return true, if profiling code should be emitted before
2919 prologue. Otherwise it returns false.
2920 Note: For x86 with "hotfix" it is sorried. */
2921 static bool
2922 ix86_profile_before_prologue (void)
2924 return flag_fentry != 0;
2927 /* Function that is callable from the debugger to print the current
2928 options. */
2929 void ATTRIBUTE_UNUSED
2930 ix86_debug_options (void)
2932 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2933 ix86_arch_string, ix86_tune_string,
2934 ix86_fpmath, true);
2936 if (opts)
2938 fprintf (stderr, "%s\n\n", opts);
2939 free (opts);
2941 else
2942 fputs ("<no options>\n\n", stderr);
2944 return;
2947 static const char *stringop_alg_names[] = {
2948 #define DEF_ENUM
2949 #define DEF_ALG(alg, name) #name,
2950 #include "stringop.def"
2951 #undef DEF_ENUM
2952 #undef DEF_ALG
2955 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2956 The string is of the following form (or comma separated list of it):
2958 strategy_alg:max_size:[align|noalign]
2960 where the full size range for the strategy is either [0, max_size] or
2961 [min_size, max_size], in which min_size is the max_size + 1 of the
2962 preceding range. The last size range must have max_size == -1.
2964 Examples:
2967 -mmemcpy-strategy=libcall:-1:noalign
2969 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2973 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2975 This is to tell the compiler to use the following strategy for memset
2976 1) when the expected size is between [1, 16], use rep_8byte strategy;
2977 2) when the size is between [17, 2048], use vector_loop;
2978 3) when the size is > 2048, use libcall. */
2980 struct stringop_size_range
2982 int max;
2983 stringop_alg alg;
2984 bool noalign;
2987 static void
2988 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2990 const struct stringop_algs *default_algs;
2991 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2992 char *curr_range_str, *next_range_str;
2993 int i = 0, n = 0;
2995 if (is_memset)
2996 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2997 else
2998 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3000 curr_range_str = strategy_str;
3004 int maxs;
3005 char alg_name[128];
3006 char align[16];
3007 next_range_str = strchr (curr_range_str, ',');
3008 if (next_range_str)
3009 *next_range_str++ = '\0';
3011 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3012 alg_name, &maxs, align))
3014 error ("wrong arg %s to option %s", curr_range_str,
3015 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3016 return;
3019 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3021 error ("size ranges of option %s should be increasing",
3022 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3023 return;
3026 for (i = 0; i < last_alg; i++)
3027 if (!strcmp (alg_name, stringop_alg_names[i]))
3028 break;
3030 if (i == last_alg)
3032 error ("wrong stringop strategy name %s specified for option %s",
3033 alg_name,
3034 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3035 return;
3038 if ((stringop_alg) i == rep_prefix_8_byte
3039 && !TARGET_64BIT)
3041 /* rep; movq isn't available in 32-bit code. */
3042 error ("stringop strategy name %s specified for option %s "
3043 "not supported for 32-bit code",
3044 alg_name,
3045 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3046 return;
3049 input_ranges[n].max = maxs;
3050 input_ranges[n].alg = (stringop_alg) i;
3051 if (!strcmp (align, "align"))
3052 input_ranges[n].noalign = false;
3053 else if (!strcmp (align, "noalign"))
3054 input_ranges[n].noalign = true;
3055 else
3057 error ("unknown alignment %s specified for option %s",
3058 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3059 return;
3061 n++;
3062 curr_range_str = next_range_str;
3064 while (curr_range_str);
3066 if (input_ranges[n - 1].max != -1)
3068 error ("the max value for the last size range should be -1"
3069 " for option %s",
3070 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3071 return;
3074 if (n > MAX_STRINGOP_ALGS)
3076 error ("too many size ranges specified in option %s",
3077 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3078 return;
3081 /* Now override the default algs array. */
3082 for (i = 0; i < n; i++)
3084 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3085 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3086 = input_ranges[i].alg;
3087 *const_cast<int *>(&default_algs->size[i].noalign)
3088 = input_ranges[i].noalign;
3093 /* parse -mtune-ctrl= option. When DUMP is true,
3094 print the features that are explicitly set. */
3096 static void
3097 parse_mtune_ctrl_str (bool dump)
3099 if (!ix86_tune_ctrl_string)
3100 return;
3102 char *next_feature_string = NULL;
3103 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3104 char *orig = curr_feature_string;
3105 int i;
3108 bool clear = false;
3110 next_feature_string = strchr (curr_feature_string, ',');
3111 if (next_feature_string)
3112 *next_feature_string++ = '\0';
3113 if (*curr_feature_string == '^')
3115 curr_feature_string++;
3116 clear = true;
3118 for (i = 0; i < X86_TUNE_LAST; i++)
3120 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3122 ix86_tune_features[i] = !clear;
3123 if (dump)
3124 fprintf (stderr, "Explicitly %s feature %s\n",
3125 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3126 break;
3129 if (i == X86_TUNE_LAST)
3130 error ("Unknown parameter to option -mtune-ctrl: %s",
3131 clear ? curr_feature_string - 1 : curr_feature_string);
3132 curr_feature_string = next_feature_string;
3134 while (curr_feature_string);
3135 free (orig);
3138 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3139 processor type. */
3141 static void
3142 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3144 unsigned int ix86_tune_mask = 1u << ix86_tune;
3145 int i;
3147 for (i = 0; i < X86_TUNE_LAST; ++i)
3149 if (ix86_tune_no_default)
3150 ix86_tune_features[i] = 0;
3151 else
3152 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3155 if (dump)
3157 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3158 for (i = 0; i < X86_TUNE_LAST; i++)
3159 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3160 ix86_tune_features[i] ? "on" : "off");
3163 parse_mtune_ctrl_str (dump);
3167 /* Default align_* from the processor table. */
3169 static void
3170 ix86_default_align (struct gcc_options *opts)
3172 if (opts->x_align_loops == 0)
3174 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3175 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3177 if (opts->x_align_jumps == 0)
3179 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3180 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3182 if (opts->x_align_functions == 0)
3184 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3188 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3190 static void
3191 ix86_override_options_after_change (void)
3193 ix86_default_align (&global_options);
3196 /* Override various settings based on options. If MAIN_ARGS_P, the
3197 options are from the command line, otherwise they are from
3198 attributes. */
3200 static void
3201 ix86_option_override_internal (bool main_args_p,
3202 struct gcc_options *opts,
3203 struct gcc_options *opts_set)
3205 int i;
3206 unsigned int ix86_arch_mask;
3207 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3208 const char *prefix;
3209 const char *suffix;
3210 const char *sw;
3212 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3213 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3214 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3215 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3216 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3217 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3218 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3219 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3220 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3221 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3222 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3223 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3224 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3225 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3226 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3227 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3228 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3229 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3230 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3231 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3232 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3233 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3234 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3235 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3236 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3237 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3238 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3239 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3240 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3241 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3242 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3243 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3244 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3245 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3246 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3247 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3248 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3249 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3250 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3251 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3252 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3253 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3254 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3255 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3256 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3257 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3258 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3259 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3260 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3261 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3262 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3263 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3264 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3265 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3266 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3267 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3268 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3269 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3271 #define PTA_CORE2 \
3272 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3273 | PTA_CX16 | PTA_FXSR)
3274 #define PTA_NEHALEM \
3275 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3276 #define PTA_WESTMERE \
3277 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3278 #define PTA_SANDYBRIDGE \
3279 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3280 #define PTA_IVYBRIDGE \
3281 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3282 #define PTA_HASWELL \
3283 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3284 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3285 #define PTA_BROADWELL \
3286 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3287 #define PTA_SKYLAKE \
3288 (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
3289 #define PTA_KNL \
3290 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3291 #define PTA_BONNELL \
3292 (PTA_CORE2 | PTA_MOVBE)
3293 #define PTA_SILVERMONT \
3294 (PTA_WESTMERE | PTA_MOVBE)
3296 /* if this reaches 64, need to widen struct pta flags below */
3298 static struct pta
3300 const char *const name; /* processor name or nickname. */
3301 const enum processor_type processor;
3302 const enum attr_cpu schedule;
3303 const unsigned HOST_WIDE_INT flags;
3305 const processor_alias_table[] =
3307 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3308 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3309 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3310 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3311 {"iamcu", PROCESSOR_IAMCU, CPU_PENTIUM, 0},
3312 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3313 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3314 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3315 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3316 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3317 PTA_MMX | PTA_SSE | PTA_FXSR},
3318 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3319 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3320 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3321 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3322 PTA_MMX | PTA_SSE | PTA_FXSR},
3323 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3324 PTA_MMX | PTA_SSE | PTA_FXSR},
3325 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3326 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3327 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3328 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3329 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3330 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3331 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3332 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3333 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3334 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3335 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3336 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3337 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3338 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3339 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3340 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3341 PTA_SANDYBRIDGE},
3342 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3343 PTA_SANDYBRIDGE},
3344 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3345 PTA_IVYBRIDGE},
3346 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3347 PTA_IVYBRIDGE},
3348 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3349 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3350 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3351 {"skylake", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_SKYLAKE},
3352 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3353 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3354 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3355 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3356 {"knl", PROCESSOR_KNL, CPU_SLM, PTA_KNL},
3357 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3358 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3359 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3360 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3361 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3362 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3363 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3364 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3365 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3366 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3367 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3368 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3369 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3370 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3371 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3372 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3373 {"x86-64", PROCESSOR_K8, CPU_K8,
3374 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3375 {"k8", PROCESSOR_K8, CPU_K8,
3376 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3377 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3378 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3379 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3380 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3381 {"opteron", PROCESSOR_K8, CPU_K8,
3382 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3383 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3384 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3385 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3386 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3387 {"athlon64", PROCESSOR_K8, CPU_K8,
3388 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3389 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3390 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3391 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3392 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3393 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3394 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3395 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3396 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3397 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3398 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3399 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3400 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3401 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3402 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3403 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3404 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3405 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3406 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3407 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3408 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3409 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3410 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3411 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3412 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3413 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3414 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3415 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3416 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3417 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3418 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3419 | PTA_XSAVEOPT | PTA_FSGSBASE},
3420 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3421 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3422 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3423 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3424 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3425 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3426 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3427 | PTA_MOVBE | PTA_MWAITX},
3428 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3429 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3430 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3431 | PTA_FXSR | PTA_XSAVE},
3432 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3433 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3434 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3435 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3436 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3437 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3439 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3440 PTA_64BIT
3441 | PTA_HLE /* flags are only used for -march switch. */ },
3444 /* -mrecip options. */
3445 static struct
3447 const char *string; /* option name */
3448 unsigned int mask; /* mask bits to set */
3450 const recip_options[] =
3452 { "all", RECIP_MASK_ALL },
3453 { "none", RECIP_MASK_NONE },
3454 { "div", RECIP_MASK_DIV },
3455 { "sqrt", RECIP_MASK_SQRT },
3456 { "vec-div", RECIP_MASK_VEC_DIV },
3457 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3460 int const pta_size = ARRAY_SIZE (processor_alias_table);
3462 /* Set up prefix/suffix so the error messages refer to either the command
3463 line argument, or the attribute(target). */
3464 if (main_args_p)
3466 prefix = "-m";
3467 suffix = "";
3468 sw = "switch";
3470 else
3472 prefix = "option(\"";
3473 suffix = "\")";
3474 sw = "attribute";
3477 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3478 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3479 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3480 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3481 #ifdef TARGET_BI_ARCH
3482 else
3484 #if TARGET_BI_ARCH == 1
3485 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3486 is on and OPTION_MASK_ABI_X32 is off. We turn off
3487 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3488 -mx32. */
3489 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3490 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3491 #else
3492 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3493 on and OPTION_MASK_ABI_64 is off. We turn off
3494 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3495 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3496 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3497 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3498 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3499 #endif
3500 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3501 && TARGET_IAMCU_P (opts->x_target_flags))
3502 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3503 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3505 #endif
3507 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3509 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3510 OPTION_MASK_ABI_64 for TARGET_X32. */
3511 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3512 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3514 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3515 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3516 | OPTION_MASK_ABI_X32
3517 | OPTION_MASK_ABI_64);
3518 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3520 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3521 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3522 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3523 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3526 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3527 SUBTARGET_OVERRIDE_OPTIONS;
3528 #endif
3530 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3531 SUBSUBTARGET_OVERRIDE_OPTIONS;
3532 #endif
3534 /* -fPIC is the default for x86_64. */
3535 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3536 opts->x_flag_pic = 2;
3538 /* Need to check -mtune=generic first. */
3539 if (opts->x_ix86_tune_string)
3541 /* As special support for cross compilers we read -mtune=native
3542 as -mtune=generic. With native compilers we won't see the
3543 -mtune=native, as it was changed by the driver. */
3544 if (!strcmp (opts->x_ix86_tune_string, "native"))
3546 opts->x_ix86_tune_string = "generic";
3548 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3549 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3550 "%stune=k8%s or %stune=generic%s instead as appropriate",
3551 prefix, suffix, prefix, suffix, prefix, suffix);
3553 else
3555 if (opts->x_ix86_arch_string)
3556 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3557 if (!opts->x_ix86_tune_string)
3559 opts->x_ix86_tune_string
3560 = processor_target_table[TARGET_CPU_DEFAULT].name;
3561 ix86_tune_defaulted = 1;
3564 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3565 or defaulted. We need to use a sensible tune option. */
3566 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3568 opts->x_ix86_tune_string = "generic";
3572 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3573 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3575 /* rep; movq isn't available in 32-bit code. */
3576 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3577 opts->x_ix86_stringop_alg = no_stringop;
3580 if (!opts->x_ix86_arch_string)
3581 opts->x_ix86_arch_string
3582 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3583 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3584 else
3585 ix86_arch_specified = 1;
3587 if (opts_set->x_ix86_pmode)
3589 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3590 && opts->x_ix86_pmode == PMODE_SI)
3591 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3592 && opts->x_ix86_pmode == PMODE_DI))
3593 error ("address mode %qs not supported in the %s bit mode",
3594 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3595 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3597 else
3598 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3599 ? PMODE_DI : PMODE_SI;
3601 if (!opts_set->x_ix86_abi)
3602 opts->x_ix86_abi = DEFAULT_ABI;
3604 /* For targets using ms ABI enable ms-extensions, if not
3605 explicit turned off. For non-ms ABI we turn off this
3606 option. */
3607 if (!opts_set->x_flag_ms_extensions)
3608 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3610 if (opts_set->x_ix86_cmodel)
3612 switch (opts->x_ix86_cmodel)
3614 case CM_SMALL:
3615 case CM_SMALL_PIC:
3616 if (opts->x_flag_pic)
3617 opts->x_ix86_cmodel = CM_SMALL_PIC;
3618 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3619 error ("code model %qs not supported in the %s bit mode",
3620 "small", "32");
3621 break;
3623 case CM_MEDIUM:
3624 case CM_MEDIUM_PIC:
3625 if (opts->x_flag_pic)
3626 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3627 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3628 error ("code model %qs not supported in the %s bit mode",
3629 "medium", "32");
3630 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3631 error ("code model %qs not supported in x32 mode",
3632 "medium");
3633 break;
3635 case CM_LARGE:
3636 case CM_LARGE_PIC:
3637 if (opts->x_flag_pic)
3638 opts->x_ix86_cmodel = CM_LARGE_PIC;
3639 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3640 error ("code model %qs not supported in the %s bit mode",
3641 "large", "32");
3642 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3643 error ("code model %qs not supported in x32 mode",
3644 "large");
3645 break;
3647 case CM_32:
3648 if (opts->x_flag_pic)
3649 error ("code model %s does not support PIC mode", "32");
3650 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3651 error ("code model %qs not supported in the %s bit mode",
3652 "32", "64");
3653 break;
3655 case CM_KERNEL:
3656 if (opts->x_flag_pic)
3658 error ("code model %s does not support PIC mode", "kernel");
3659 opts->x_ix86_cmodel = CM_32;
3661 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3662 error ("code model %qs not supported in the %s bit mode",
3663 "kernel", "32");
3664 break;
3666 default:
3667 gcc_unreachable ();
3670 else
3672 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3673 use of rip-relative addressing. This eliminates fixups that
3674 would otherwise be needed if this object is to be placed in a
3675 DLL, and is essentially just as efficient as direct addressing. */
3676 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3677 && (TARGET_RDOS || TARGET_PECOFF))
3678 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3679 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3680 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3681 else
3682 opts->x_ix86_cmodel = CM_32;
3684 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3686 error ("-masm=intel not supported in this configuration");
3687 opts->x_ix86_asm_dialect = ASM_ATT;
3689 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3690 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3691 sorry ("%i-bit mode not compiled in",
3692 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3694 for (i = 0; i < pta_size; i++)
3695 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3697 ix86_schedule = processor_alias_table[i].schedule;
3698 ix86_arch = processor_alias_table[i].processor;
3699 /* Default cpu tuning to the architecture. */
3700 ix86_tune = ix86_arch;
3702 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3703 && !(processor_alias_table[i].flags & PTA_64BIT))
3704 error ("CPU you selected does not support x86-64 "
3705 "instruction set");
3707 if (processor_alias_table[i].flags & PTA_MMX
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3710 if (processor_alias_table[i].flags & PTA_3DNOW
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3713 if (processor_alias_table[i].flags & PTA_3DNOW_A
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3716 if (processor_alias_table[i].flags & PTA_SSE
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3719 if (processor_alias_table[i].flags & PTA_SSE2
3720 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3721 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3722 if (processor_alias_table[i].flags & PTA_SSE3
3723 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3724 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3725 if (processor_alias_table[i].flags & PTA_SSSE3
3726 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3727 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3728 if (processor_alias_table[i].flags & PTA_SSE4_1
3729 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3730 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3731 if (processor_alias_table[i].flags & PTA_SSE4_2
3732 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3733 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3734 if (processor_alias_table[i].flags & PTA_AVX
3735 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3736 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3737 if (processor_alias_table[i].flags & PTA_AVX2
3738 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3739 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3740 if (processor_alias_table[i].flags & PTA_FMA
3741 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3742 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3743 if (processor_alias_table[i].flags & PTA_SSE4A
3744 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3745 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3746 if (processor_alias_table[i].flags & PTA_FMA4
3747 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3748 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3749 if (processor_alias_table[i].flags & PTA_XOP
3750 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3751 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3752 if (processor_alias_table[i].flags & PTA_LWP
3753 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3754 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3755 if (processor_alias_table[i].flags & PTA_ABM
3756 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3757 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3758 if (processor_alias_table[i].flags & PTA_BMI
3759 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3760 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3761 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3762 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3763 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3764 if (processor_alias_table[i].flags & PTA_TBM
3765 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3766 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3767 if (processor_alias_table[i].flags & PTA_BMI2
3768 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3769 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3770 if (processor_alias_table[i].flags & PTA_CX16
3771 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3772 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3773 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3774 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3775 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3776 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3777 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3778 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3779 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3780 if (processor_alias_table[i].flags & PTA_MOVBE
3781 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3782 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3783 if (processor_alias_table[i].flags & PTA_AES
3784 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3785 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3786 if (processor_alias_table[i].flags & PTA_SHA
3787 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3788 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3789 if (processor_alias_table[i].flags & PTA_PCLMUL
3790 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3791 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3792 if (processor_alias_table[i].flags & PTA_FSGSBASE
3793 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3794 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3795 if (processor_alias_table[i].flags & PTA_RDRND
3796 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3797 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3798 if (processor_alias_table[i].flags & PTA_F16C
3799 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3800 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3801 if (processor_alias_table[i].flags & PTA_RTM
3802 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3803 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3804 if (processor_alias_table[i].flags & PTA_HLE
3805 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3806 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3807 if (processor_alias_table[i].flags & PTA_PRFCHW
3808 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3809 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3810 if (processor_alias_table[i].flags & PTA_RDSEED
3811 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3812 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3813 if (processor_alias_table[i].flags & PTA_ADX
3814 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3815 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3816 if (processor_alias_table[i].flags & PTA_FXSR
3817 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3818 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3819 if (processor_alias_table[i].flags & PTA_XSAVE
3820 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3821 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3822 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3823 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3824 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3825 if (processor_alias_table[i].flags & PTA_AVX512F
3826 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3827 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3828 if (processor_alias_table[i].flags & PTA_AVX512ER
3829 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3830 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3831 if (processor_alias_table[i].flags & PTA_AVX512PF
3832 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3833 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3834 if (processor_alias_table[i].flags & PTA_AVX512CD
3835 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3836 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3837 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3838 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3839 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3840 if (processor_alias_table[i].flags & PTA_PCOMMIT
3841 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3842 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3843 if (processor_alias_table[i].flags & PTA_CLWB
3844 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3845 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3846 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3847 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3848 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3849 if (processor_alias_table[i].flags & PTA_XSAVEC
3850 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3851 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3852 if (processor_alias_table[i].flags & PTA_XSAVES
3853 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3854 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3855 if (processor_alias_table[i].flags & PTA_AVX512DQ
3856 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3857 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3858 if (processor_alias_table[i].flags & PTA_AVX512BW
3859 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3860 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3861 if (processor_alias_table[i].flags & PTA_AVX512VL
3862 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3863 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3864 if (processor_alias_table[i].flags & PTA_MPX
3865 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3866 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3867 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3868 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3869 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3870 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3871 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3872 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3873 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3874 x86_prefetch_sse = true;
3875 if (processor_alias_table[i].flags & PTA_MWAITX
3876 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3877 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3879 break;
3882 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3883 error ("Intel MPX does not support x32");
3885 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3886 error ("Intel MPX does not support x32");
3888 if (TARGET_IAMCU_P (opts->x_target_flags))
3890 /* Verify that x87/MMX/SSE/AVX is off for -miamcu. */
3891 if (TARGET_80387_P (opts->x_target_flags))
3892 sorry ("X87 FPU isn%'t supported in Intel MCU psABI");
3893 else if ((opts->x_ix86_isa_flags & (OPTION_MASK_ISA_MMX
3894 | OPTION_MASK_ISA_SSE
3895 | OPTION_MASK_ISA_AVX)))
3896 sorry ("%s isn%'t supported in Intel MCU psABI",
3897 TARGET_MMX_P (opts->x_ix86_isa_flags)
3898 ? "MMX"
3899 : TARGET_SSE_P (opts->x_ix86_isa_flags) ? "SSE" : "AVX");
3902 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3903 error ("generic CPU can be used only for %stune=%s %s",
3904 prefix, suffix, sw);
3905 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3906 error ("intel CPU can be used only for %stune=%s %s",
3907 prefix, suffix, sw);
3908 else if (i == pta_size)
3909 error ("bad value (%s) for %sarch=%s %s",
3910 opts->x_ix86_arch_string, prefix, suffix, sw);
3912 ix86_arch_mask = 1u << ix86_arch;
3913 for (i = 0; i < X86_ARCH_LAST; ++i)
3914 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3916 for (i = 0; i < pta_size; i++)
3917 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3919 ix86_schedule = processor_alias_table[i].schedule;
3920 ix86_tune = processor_alias_table[i].processor;
3921 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3923 if (!(processor_alias_table[i].flags & PTA_64BIT))
3925 if (ix86_tune_defaulted)
3927 opts->x_ix86_tune_string = "x86-64";
3928 for (i = 0; i < pta_size; i++)
3929 if (! strcmp (opts->x_ix86_tune_string,
3930 processor_alias_table[i].name))
3931 break;
3932 ix86_schedule = processor_alias_table[i].schedule;
3933 ix86_tune = processor_alias_table[i].processor;
3935 else
3936 error ("CPU you selected does not support x86-64 "
3937 "instruction set");
3940 /* Intel CPUs have always interpreted SSE prefetch instructions as
3941 NOPs; so, we can enable SSE prefetch instructions even when
3942 -mtune (rather than -march) points us to a processor that has them.
3943 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3944 higher processors. */
3945 if (TARGET_CMOV
3946 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3947 x86_prefetch_sse = true;
3948 break;
3951 if (ix86_tune_specified && i == pta_size)
3952 error ("bad value (%s) for %stune=%s %s",
3953 opts->x_ix86_tune_string, prefix, suffix, sw);
3955 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3957 #ifndef USE_IX86_FRAME_POINTER
3958 #define USE_IX86_FRAME_POINTER 0
3959 #endif
3961 #ifndef USE_X86_64_FRAME_POINTER
3962 #define USE_X86_64_FRAME_POINTER 0
3963 #endif
3965 /* Set the default values for switches whose default depends on TARGET_64BIT
3966 in case they weren't overwritten by command line options. */
3967 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3969 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3970 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3971 if (opts->x_flag_asynchronous_unwind_tables
3972 && !opts_set->x_flag_unwind_tables
3973 && TARGET_64BIT_MS_ABI)
3974 opts->x_flag_unwind_tables = 1;
3975 if (opts->x_flag_asynchronous_unwind_tables == 2)
3976 opts->x_flag_unwind_tables
3977 = opts->x_flag_asynchronous_unwind_tables = 1;
3978 if (opts->x_flag_pcc_struct_return == 2)
3979 opts->x_flag_pcc_struct_return = 0;
3981 else
3983 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3984 opts->x_flag_omit_frame_pointer
3985 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3986 if (opts->x_flag_asynchronous_unwind_tables == 2)
3987 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3988 if (opts->x_flag_pcc_struct_return == 2)
3990 /* Intel MCU psABI specifies that -freg-struct-return should
3991 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
3992 we check -miamcu so that -freg-struct-return is always
3993 turned on if -miamcu is used. */
3994 if (TARGET_IAMCU_P (opts->x_target_flags))
3995 opts->x_flag_pcc_struct_return = 0;
3996 else
3997 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
4001 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4002 /* TODO: ix86_cost should be chosen at instruction or function granuality
4003 so for cold code we use size_cost even in !optimize_size compilation. */
4004 if (opts->x_optimize_size)
4005 ix86_cost = &ix86_size_cost;
4006 else
4007 ix86_cost = ix86_tune_cost;
4009 /* Arrange to set up i386_stack_locals for all functions. */
4010 init_machine_status = ix86_init_machine_status;
4012 /* Validate -mregparm= value. */
4013 if (opts_set->x_ix86_regparm)
4015 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4016 warning (0, "-mregparm is ignored in 64-bit mode");
4017 else if (TARGET_IAMCU_P (opts->x_target_flags))
4018 warning (0, "-mregparm is ignored for Intel MCU psABI");
4019 if (opts->x_ix86_regparm > REGPARM_MAX)
4021 error ("-mregparm=%d is not between 0 and %d",
4022 opts->x_ix86_regparm, REGPARM_MAX);
4023 opts->x_ix86_regparm = 0;
4026 if (TARGET_IAMCU_P (opts->x_target_flags)
4027 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4028 opts->x_ix86_regparm = REGPARM_MAX;
4030 /* Default align_* from the processor table. */
4031 ix86_default_align (opts);
4033 /* Provide default for -mbranch-cost= value. */
4034 if (!opts_set->x_ix86_branch_cost)
4035 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4037 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4039 opts->x_target_flags
4040 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4042 /* Enable by default the SSE and MMX builtins. Do allow the user to
4043 explicitly disable any of these. In particular, disabling SSE and
4044 MMX for kernel code is extremely useful. */
4045 if (!ix86_arch_specified)
4046 opts->x_ix86_isa_flags
4047 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4048 | TARGET_SUBTARGET64_ISA_DEFAULT)
4049 & ~opts->x_ix86_isa_flags_explicit);
4051 if (TARGET_RTD_P (opts->x_target_flags))
4052 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
4054 else
4056 opts->x_target_flags
4057 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4059 if (!ix86_arch_specified)
4060 opts->x_ix86_isa_flags
4061 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4063 /* i386 ABI does not specify red zone. It still makes sense to use it
4064 when programmer takes care to stack from being destroyed. */
4065 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4066 opts->x_target_flags |= MASK_NO_RED_ZONE;
4069 /* Keep nonleaf frame pointers. */
4070 if (opts->x_flag_omit_frame_pointer)
4071 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4072 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4073 opts->x_flag_omit_frame_pointer = 1;
4075 /* If we're doing fast math, we don't care about comparison order
4076 wrt NaNs. This lets us use a shorter comparison sequence. */
4077 if (opts->x_flag_finite_math_only)
4078 opts->x_target_flags &= ~MASK_IEEE_FP;
4080 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4081 since the insns won't need emulation. */
4082 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4083 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4085 /* Likewise, if the target doesn't have a 387, or we've specified
4086 software floating point, don't use 387 inline intrinsics. */
4087 if (!TARGET_80387_P (opts->x_target_flags))
4088 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4090 /* Turn on MMX builtins for -msse. */
4091 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4092 opts->x_ix86_isa_flags
4093 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4095 /* Enable SSE prefetch. */
4096 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4097 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4098 x86_prefetch_sse = true;
4100 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4101 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4102 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4103 opts->x_ix86_isa_flags
4104 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4106 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4107 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4108 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4109 opts->x_ix86_isa_flags
4110 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4112 /* Enable lzcnt instruction for -mabm. */
4113 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4114 opts->x_ix86_isa_flags
4115 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4117 /* Validate -mpreferred-stack-boundary= value or default it to
4118 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4119 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4120 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4122 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4123 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4124 int max = (TARGET_SEH ? 4 : 12);
4126 if (opts->x_ix86_preferred_stack_boundary_arg < min
4127 || opts->x_ix86_preferred_stack_boundary_arg > max)
4129 if (min == max)
4130 error ("-mpreferred-stack-boundary is not supported "
4131 "for this target");
4132 else
4133 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4134 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4136 else
4137 ix86_preferred_stack_boundary
4138 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4141 /* Set the default value for -mstackrealign. */
4142 if (opts->x_ix86_force_align_arg_pointer == -1)
4143 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4145 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4147 /* Validate -mincoming-stack-boundary= value or default it to
4148 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4149 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4150 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4152 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4153 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4155 if (opts->x_ix86_incoming_stack_boundary_arg < min
4156 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4157 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4158 opts->x_ix86_incoming_stack_boundary_arg, min);
4159 else
4161 ix86_user_incoming_stack_boundary
4162 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4163 ix86_incoming_stack_boundary
4164 = ix86_user_incoming_stack_boundary;
4168 #ifndef NO_PROFILE_COUNTERS
4169 if (flag_nop_mcount)
4170 error ("-mnop-mcount is not compatible with this target");
4171 #endif
4172 if (flag_nop_mcount && flag_pic)
4173 error ("-mnop-mcount is not implemented for -fPIC");
4175 /* Accept -msseregparm only if at least SSE support is enabled. */
4176 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4177 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4178 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4180 if (opts_set->x_ix86_fpmath)
4182 if (opts->x_ix86_fpmath & FPMATH_SSE)
4184 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4186 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4187 opts->x_ix86_fpmath = FPMATH_387;
4189 else if ((opts->x_ix86_fpmath & FPMATH_387)
4190 && !TARGET_80387_P (opts->x_target_flags))
4192 warning (0, "387 instruction set disabled, using SSE arithmetics");
4193 opts->x_ix86_fpmath = FPMATH_SSE;
4197 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4198 fpmath=387. The second is however default at many targets since the
4199 extra 80bit precision of temporaries is considered to be part of ABI.
4200 Overwrite the default at least for -ffast-math.
4201 TODO: -mfpmath=both seems to produce same performing code with bit
4202 smaller binaries. It is however not clear if register allocation is
4203 ready for this setting.
4204 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4205 codegen. We may switch to 387 with -ffast-math for size optimized
4206 functions. */
4207 else if (fast_math_flags_set_p (&global_options)
4208 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4209 opts->x_ix86_fpmath = FPMATH_SSE;
4210 else
4211 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4213 /* If the i387 is disabled, then do not return values in it. */
4214 if (!TARGET_80387_P (opts->x_target_flags))
4215 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4217 /* Use external vectorized library in vectorizing intrinsics. */
4218 if (opts_set->x_ix86_veclibabi_type)
4219 switch (opts->x_ix86_veclibabi_type)
4221 case ix86_veclibabi_type_svml:
4222 ix86_veclib_handler = ix86_veclibabi_svml;
4223 break;
4225 case ix86_veclibabi_type_acml:
4226 ix86_veclib_handler = ix86_veclibabi_acml;
4227 break;
4229 default:
4230 gcc_unreachable ();
4233 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4234 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4235 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4237 /* If stack probes are required, the space used for large function
4238 arguments on the stack must also be probed, so enable
4239 -maccumulate-outgoing-args so this happens in the prologue. */
4240 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4241 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4243 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4244 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4245 "for correctness", prefix, suffix);
4246 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4249 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4251 char *p;
4252 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4253 p = strchr (internal_label_prefix, 'X');
4254 internal_label_prefix_len = p - internal_label_prefix;
4255 *p = '\0';
4258 /* When scheduling description is not available, disable scheduler pass
4259 so it won't slow down the compilation and make x87 code slower. */
4260 if (!TARGET_SCHEDULE)
4261 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4263 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4264 ix86_tune_cost->simultaneous_prefetches,
4265 opts->x_param_values,
4266 opts_set->x_param_values);
4267 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4268 ix86_tune_cost->prefetch_block,
4269 opts->x_param_values,
4270 opts_set->x_param_values);
4271 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4272 ix86_tune_cost->l1_cache_size,
4273 opts->x_param_values,
4274 opts_set->x_param_values);
4275 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4276 ix86_tune_cost->l2_cache_size,
4277 opts->x_param_values,
4278 opts_set->x_param_values);
4280 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4281 if (opts->x_flag_prefetch_loop_arrays < 0
4282 && HAVE_prefetch
4283 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4284 && !opts->x_optimize_size
4285 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4286 opts->x_flag_prefetch_loop_arrays = 1;
4288 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4289 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4290 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4291 targetm.expand_builtin_va_start = NULL;
4293 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4295 ix86_gen_leave = gen_leave_rex64;
4296 if (Pmode == DImode)
4298 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4299 ix86_gen_tls_local_dynamic_base_64
4300 = gen_tls_local_dynamic_base_64_di;
4302 else
4304 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4305 ix86_gen_tls_local_dynamic_base_64
4306 = gen_tls_local_dynamic_base_64_si;
4309 else
4310 ix86_gen_leave = gen_leave;
4312 if (Pmode == DImode)
4314 ix86_gen_add3 = gen_adddi3;
4315 ix86_gen_sub3 = gen_subdi3;
4316 ix86_gen_sub3_carry = gen_subdi3_carry;
4317 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4318 ix86_gen_andsp = gen_anddi3;
4319 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4320 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4321 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4322 ix86_gen_monitor = gen_sse3_monitor_di;
4323 ix86_gen_monitorx = gen_monitorx_di;
4325 else
4327 ix86_gen_add3 = gen_addsi3;
4328 ix86_gen_sub3 = gen_subsi3;
4329 ix86_gen_sub3_carry = gen_subsi3_carry;
4330 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4331 ix86_gen_andsp = gen_andsi3;
4332 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4333 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4334 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4335 ix86_gen_monitor = gen_sse3_monitor_si;
4336 ix86_gen_monitorx = gen_monitorx_si;
4339 #ifdef USE_IX86_CLD
4340 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4341 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4342 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4343 #endif
4345 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4347 if (opts->x_flag_fentry > 0)
4348 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4349 "with -fpic");
4350 opts->x_flag_fentry = 0;
4352 else if (TARGET_SEH)
4354 if (opts->x_flag_fentry == 0)
4355 sorry ("-mno-fentry isn%'t compatible with SEH");
4356 opts->x_flag_fentry = 1;
4358 else if (opts->x_flag_fentry < 0)
4360 #if defined(PROFILE_BEFORE_PROLOGUE)
4361 opts->x_flag_fentry = 1;
4362 #else
4363 opts->x_flag_fentry = 0;
4364 #endif
4367 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4368 opts->x_target_flags |= MASK_VZEROUPPER;
4369 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4370 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4371 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4372 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4373 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4374 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4375 /* Enable 128-bit AVX instruction generation
4376 for the auto-vectorizer. */
4377 if (TARGET_AVX128_OPTIMAL
4378 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4379 opts->x_target_flags |= MASK_PREFER_AVX128;
4381 if (opts->x_ix86_recip_name)
4383 char *p = ASTRDUP (opts->x_ix86_recip_name);
4384 char *q;
4385 unsigned int mask, i;
4386 bool invert;
4388 while ((q = strtok (p, ",")) != NULL)
4390 p = NULL;
4391 if (*q == '!')
4393 invert = true;
4394 q++;
4396 else
4397 invert = false;
4399 if (!strcmp (q, "default"))
4400 mask = RECIP_MASK_ALL;
4401 else
4403 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4404 if (!strcmp (q, recip_options[i].string))
4406 mask = recip_options[i].mask;
4407 break;
4410 if (i == ARRAY_SIZE (recip_options))
4412 error ("unknown option for -mrecip=%s", q);
4413 invert = false;
4414 mask = RECIP_MASK_NONE;
4418 opts->x_recip_mask_explicit |= mask;
4419 if (invert)
4420 opts->x_recip_mask &= ~mask;
4421 else
4422 opts->x_recip_mask |= mask;
4426 if (TARGET_RECIP_P (opts->x_target_flags))
4427 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4428 else if (opts_set->x_target_flags & MASK_RECIP)
4429 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4431 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4432 for 64-bit Bionic. Also default long double to 64-bit for Intel
4433 MCU psABI. */
4434 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4435 && !(opts_set->x_target_flags
4436 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4437 opts->x_target_flags |= (TARGET_64BIT
4438 ? MASK_LONG_DOUBLE_128
4439 : MASK_LONG_DOUBLE_64);
4441 /* Only one of them can be active. */
4442 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4443 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4445 /* Save the initial options in case the user does function specific
4446 options. */
4447 if (main_args_p)
4448 target_option_default_node = target_option_current_node
4449 = build_target_option_node (opts);
4451 /* Handle stack protector */
4452 if (!opts_set->x_ix86_stack_protector_guard)
4453 opts->x_ix86_stack_protector_guard
4454 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4456 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4457 if (opts->x_ix86_tune_memcpy_strategy)
4459 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4460 ix86_parse_stringop_strategy_string (str, false);
4461 free (str);
4464 if (opts->x_ix86_tune_memset_strategy)
4466 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4467 ix86_parse_stringop_strategy_string (str, true);
4468 free (str);
4472 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4474 static void
4475 ix86_option_override (void)
4477 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4478 struct register_pass_info insert_vzeroupper_info
4479 = { pass_insert_vzeroupper, "reload",
4480 1, PASS_POS_INSERT_AFTER
4483 ix86_option_override_internal (true, &global_options, &global_options_set);
4486 /* This needs to be done at start up. It's convenient to do it here. */
4487 register_pass (&insert_vzeroupper_info);
4490 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4491 static char *
4492 ix86_offload_options (void)
4494 if (TARGET_LP64)
4495 return xstrdup ("-foffload-abi=lp64");
4496 return xstrdup ("-foffload-abi=ilp32");
4499 /* Update register usage after having seen the compiler flags. */
4501 static void
4502 ix86_conditional_register_usage (void)
4504 int i, c_mask;
4506 /* For 32-bit targets, squash the REX registers. */
4507 if (! TARGET_64BIT)
4509 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4510 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4511 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4512 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4513 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4514 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4517 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4518 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4519 : TARGET_64BIT ? (1 << 2)
4520 : (1 << 1));
4522 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4524 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4526 /* Set/reset conditionally defined registers from
4527 CALL_USED_REGISTERS initializer. */
4528 if (call_used_regs[i] > 1)
4529 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4531 /* Calculate registers of CLOBBERED_REGS register set
4532 as call used registers from GENERAL_REGS register set. */
4533 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4534 && call_used_regs[i])
4535 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4538 /* If MMX is disabled, squash the registers. */
4539 if (! TARGET_MMX)
4540 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4541 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4542 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4544 /* If SSE is disabled, squash the registers. */
4545 if (! TARGET_SSE)
4546 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4547 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4548 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4550 /* If the FPU is disabled, squash the registers. */
4551 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4552 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4553 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4554 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4556 /* If AVX512F is disabled, squash the registers. */
4557 if (! TARGET_AVX512F)
4559 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4560 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4562 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4563 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4566 /* If MPX is disabled, squash the registers. */
4567 if (! TARGET_MPX)
4568 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4569 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4573 /* Save the current options */
4575 static void
4576 ix86_function_specific_save (struct cl_target_option *ptr,
4577 struct gcc_options *opts)
4579 ptr->arch = ix86_arch;
4580 ptr->schedule = ix86_schedule;
4581 ptr->prefetch_sse = x86_prefetch_sse;
4582 ptr->tune = ix86_tune;
4583 ptr->branch_cost = ix86_branch_cost;
4584 ptr->tune_defaulted = ix86_tune_defaulted;
4585 ptr->arch_specified = ix86_arch_specified;
4586 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4587 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4588 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4589 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4590 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4591 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4592 ptr->x_ix86_abi = opts->x_ix86_abi;
4593 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4594 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4595 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4596 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4597 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4598 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4599 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4600 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4601 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4602 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4603 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4604 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4605 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4606 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4607 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4608 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4609 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4610 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4611 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4612 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4614 /* The fields are char but the variables are not; make sure the
4615 values fit in the fields. */
4616 gcc_assert (ptr->arch == ix86_arch);
4617 gcc_assert (ptr->schedule == ix86_schedule);
4618 gcc_assert (ptr->tune == ix86_tune);
4619 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4622 /* Restore the current options */
4624 static void
4625 ix86_function_specific_restore (struct gcc_options *opts,
4626 struct cl_target_option *ptr)
4628 enum processor_type old_tune = ix86_tune;
4629 enum processor_type old_arch = ix86_arch;
4630 unsigned int ix86_arch_mask;
4631 int i;
4633 /* We don't change -fPIC. */
4634 opts->x_flag_pic = flag_pic;
4636 ix86_arch = (enum processor_type) ptr->arch;
4637 ix86_schedule = (enum attr_cpu) ptr->schedule;
4638 ix86_tune = (enum processor_type) ptr->tune;
4639 x86_prefetch_sse = ptr->prefetch_sse;
4640 opts->x_ix86_branch_cost = ptr->branch_cost;
4641 ix86_tune_defaulted = ptr->tune_defaulted;
4642 ix86_arch_specified = ptr->arch_specified;
4643 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4644 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4645 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4646 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4647 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4648 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4649 opts->x_ix86_abi = ptr->x_ix86_abi;
4650 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4651 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4652 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4653 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4654 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4655 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4656 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4657 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4658 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4659 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4660 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4661 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4662 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4663 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4664 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4665 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4666 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4667 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4668 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4669 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4670 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4671 /* TODO: ix86_cost should be chosen at instruction or function granuality
4672 so for cold code we use size_cost even in !optimize_size compilation. */
4673 if (opts->x_optimize_size)
4674 ix86_cost = &ix86_size_cost;
4675 else
4676 ix86_cost = ix86_tune_cost;
4678 /* Recreate the arch feature tests if the arch changed */
4679 if (old_arch != ix86_arch)
4681 ix86_arch_mask = 1u << ix86_arch;
4682 for (i = 0; i < X86_ARCH_LAST; ++i)
4683 ix86_arch_features[i]
4684 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4687 /* Recreate the tune optimization tests */
4688 if (old_tune != ix86_tune)
4689 set_ix86_tune_features (ix86_tune, false);
4692 /* Adjust target options after streaming them in. This is mainly about
4693 reconciling them with global options. */
4695 static void
4696 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4698 /* flag_pic is a global option, but ix86_cmodel is target saved option
4699 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4700 for PIC, or error out. */
4701 if (flag_pic)
4702 switch (ptr->x_ix86_cmodel)
4704 case CM_SMALL:
4705 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4706 break;
4708 case CM_MEDIUM:
4709 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4710 break;
4712 case CM_LARGE:
4713 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4714 break;
4716 case CM_KERNEL:
4717 error ("code model %s does not support PIC mode", "kernel");
4718 break;
4720 default:
4721 break;
4723 else
4724 switch (ptr->x_ix86_cmodel)
4726 case CM_SMALL_PIC:
4727 ptr->x_ix86_cmodel = CM_SMALL;
4728 break;
4730 case CM_MEDIUM_PIC:
4731 ptr->x_ix86_cmodel = CM_MEDIUM;
4732 break;
4734 case CM_LARGE_PIC:
4735 ptr->x_ix86_cmodel = CM_LARGE;
4736 break;
4738 default:
4739 break;
4743 /* Print the current options */
4745 static void
4746 ix86_function_specific_print (FILE *file, int indent,
4747 struct cl_target_option *ptr)
4749 char *target_string
4750 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4751 NULL, NULL, ptr->x_ix86_fpmath, false);
4753 gcc_assert (ptr->arch < PROCESSOR_max);
4754 fprintf (file, "%*sarch = %d (%s)\n",
4755 indent, "",
4756 ptr->arch, processor_target_table[ptr->arch].name);
4758 gcc_assert (ptr->tune < PROCESSOR_max);
4759 fprintf (file, "%*stune = %d (%s)\n",
4760 indent, "",
4761 ptr->tune, processor_target_table[ptr->tune].name);
4763 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4765 if (target_string)
4767 fprintf (file, "%*s%s\n", indent, "", target_string);
4768 free (target_string);
4773 /* Inner function to process the attribute((target(...))), take an argument and
4774 set the current options from the argument. If we have a list, recursively go
4775 over the list. */
4777 static bool
4778 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4779 struct gcc_options *opts,
4780 struct gcc_options *opts_set,
4781 struct gcc_options *enum_opts_set)
4783 char *next_optstr;
4784 bool ret = true;
4786 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4787 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4788 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4789 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4790 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4792 enum ix86_opt_type
4794 ix86_opt_unknown,
4795 ix86_opt_yes,
4796 ix86_opt_no,
4797 ix86_opt_str,
4798 ix86_opt_enum,
4799 ix86_opt_isa
4802 static const struct
4804 const char *string;
4805 size_t len;
4806 enum ix86_opt_type type;
4807 int opt;
4808 int mask;
4809 } attrs[] = {
4810 /* isa options */
4811 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4812 IX86_ATTR_ISA ("abm", OPT_mabm),
4813 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4814 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4815 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4816 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4817 IX86_ATTR_ISA ("aes", OPT_maes),
4818 IX86_ATTR_ISA ("sha", OPT_msha),
4819 IX86_ATTR_ISA ("avx", OPT_mavx),
4820 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4821 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4822 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4823 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4824 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4825 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4826 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4827 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4828 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4829 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4830 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4831 IX86_ATTR_ISA ("sse", OPT_msse),
4832 IX86_ATTR_ISA ("sse2", OPT_msse2),
4833 IX86_ATTR_ISA ("sse3", OPT_msse3),
4834 IX86_ATTR_ISA ("sse4", OPT_msse4),
4835 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4836 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4837 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4838 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4839 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4840 IX86_ATTR_ISA ("fma", OPT_mfma),
4841 IX86_ATTR_ISA ("xop", OPT_mxop),
4842 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4843 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4844 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4845 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4846 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4847 IX86_ATTR_ISA ("hle", OPT_mhle),
4848 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4849 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4850 IX86_ATTR_ISA ("adx", OPT_madx),
4851 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4852 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4853 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4854 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4855 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4856 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4857 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4858 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4859 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4860 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4861 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4862 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4864 /* enum options */
4865 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4867 /* string options */
4868 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4869 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4871 /* flag options */
4872 IX86_ATTR_YES ("cld",
4873 OPT_mcld,
4874 MASK_CLD),
4876 IX86_ATTR_NO ("fancy-math-387",
4877 OPT_mfancy_math_387,
4878 MASK_NO_FANCY_MATH_387),
4880 IX86_ATTR_YES ("ieee-fp",
4881 OPT_mieee_fp,
4882 MASK_IEEE_FP),
4884 IX86_ATTR_YES ("inline-all-stringops",
4885 OPT_minline_all_stringops,
4886 MASK_INLINE_ALL_STRINGOPS),
4888 IX86_ATTR_YES ("inline-stringops-dynamically",
4889 OPT_minline_stringops_dynamically,
4890 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4892 IX86_ATTR_NO ("align-stringops",
4893 OPT_mno_align_stringops,
4894 MASK_NO_ALIGN_STRINGOPS),
4896 IX86_ATTR_YES ("recip",
4897 OPT_mrecip,
4898 MASK_RECIP),
4902 /* If this is a list, recurse to get the options. */
4903 if (TREE_CODE (args) == TREE_LIST)
4905 bool ret = true;
4907 for (; args; args = TREE_CHAIN (args))
4908 if (TREE_VALUE (args)
4909 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4910 p_strings, opts, opts_set,
4911 enum_opts_set))
4912 ret = false;
4914 return ret;
4917 else if (TREE_CODE (args) != STRING_CST)
4919 error ("attribute %<target%> argument not a string");
4920 return false;
4923 /* Handle multiple arguments separated by commas. */
4924 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4926 while (next_optstr && *next_optstr != '\0')
4928 char *p = next_optstr;
4929 char *orig_p = p;
4930 char *comma = strchr (next_optstr, ',');
4931 const char *opt_string;
4932 size_t len, opt_len;
4933 int opt;
4934 bool opt_set_p;
4935 char ch;
4936 unsigned i;
4937 enum ix86_opt_type type = ix86_opt_unknown;
4938 int mask = 0;
4940 if (comma)
4942 *comma = '\0';
4943 len = comma - next_optstr;
4944 next_optstr = comma + 1;
4946 else
4948 len = strlen (p);
4949 next_optstr = NULL;
4952 /* Recognize no-xxx. */
4953 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4955 opt_set_p = false;
4956 p += 3;
4957 len -= 3;
4959 else
4960 opt_set_p = true;
4962 /* Find the option. */
4963 ch = *p;
4964 opt = N_OPTS;
4965 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4967 type = attrs[i].type;
4968 opt_len = attrs[i].len;
4969 if (ch == attrs[i].string[0]
4970 && ((type != ix86_opt_str && type != ix86_opt_enum)
4971 ? len == opt_len
4972 : len > opt_len)
4973 && memcmp (p, attrs[i].string, opt_len) == 0)
4975 opt = attrs[i].opt;
4976 mask = attrs[i].mask;
4977 opt_string = attrs[i].string;
4978 break;
4982 /* Process the option. */
4983 if (opt == N_OPTS)
4985 error ("attribute(target(\"%s\")) is unknown", orig_p);
4986 ret = false;
4989 else if (type == ix86_opt_isa)
4991 struct cl_decoded_option decoded;
4993 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4994 ix86_handle_option (opts, opts_set,
4995 &decoded, input_location);
4998 else if (type == ix86_opt_yes || type == ix86_opt_no)
5000 if (type == ix86_opt_no)
5001 opt_set_p = !opt_set_p;
5003 if (opt_set_p)
5004 opts->x_target_flags |= mask;
5005 else
5006 opts->x_target_flags &= ~mask;
5009 else if (type == ix86_opt_str)
5011 if (p_strings[opt])
5013 error ("option(\"%s\") was already specified", opt_string);
5014 ret = false;
5016 else
5017 p_strings[opt] = xstrdup (p + opt_len);
5020 else if (type == ix86_opt_enum)
5022 bool arg_ok;
5023 int value;
5025 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5026 if (arg_ok)
5027 set_option (opts, enum_opts_set, opt, value,
5028 p + opt_len, DK_UNSPECIFIED, input_location,
5029 global_dc);
5030 else
5032 error ("attribute(target(\"%s\")) is unknown", orig_p);
5033 ret = false;
5037 else
5038 gcc_unreachable ();
5041 return ret;
5044 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5046 tree
5047 ix86_valid_target_attribute_tree (tree args,
5048 struct gcc_options *opts,
5049 struct gcc_options *opts_set)
5051 const char *orig_arch_string = opts->x_ix86_arch_string;
5052 const char *orig_tune_string = opts->x_ix86_tune_string;
5053 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5054 int orig_tune_defaulted = ix86_tune_defaulted;
5055 int orig_arch_specified = ix86_arch_specified;
5056 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5057 tree t = NULL_TREE;
5058 int i;
5059 struct cl_target_option *def
5060 = TREE_TARGET_OPTION (target_option_default_node);
5061 struct gcc_options enum_opts_set;
5063 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5065 /* Process each of the options on the chain. */
5066 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5067 opts_set, &enum_opts_set))
5068 return error_mark_node;
5070 /* If the changed options are different from the default, rerun
5071 ix86_option_override_internal, and then save the options away.
5072 The string options are attribute options, and will be undone
5073 when we copy the save structure. */
5074 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5075 || opts->x_target_flags != def->x_target_flags
5076 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5077 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5078 || enum_opts_set.x_ix86_fpmath)
5080 /* If we are using the default tune= or arch=, undo the string assigned,
5081 and use the default. */
5082 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5083 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
5084 else if (!orig_arch_specified)
5085 opts->x_ix86_arch_string = NULL;
5087 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5088 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
5089 else if (orig_tune_defaulted)
5090 opts->x_ix86_tune_string = NULL;
5092 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5093 if (enum_opts_set.x_ix86_fpmath)
5094 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5095 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5096 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5098 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5099 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5102 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5103 ix86_option_override_internal (false, opts, opts_set);
5105 /* Add any builtin functions with the new isa if any. */
5106 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5108 /* Save the current options unless we are validating options for
5109 #pragma. */
5110 t = build_target_option_node (opts);
5112 opts->x_ix86_arch_string = orig_arch_string;
5113 opts->x_ix86_tune_string = orig_tune_string;
5114 opts_set->x_ix86_fpmath = orig_fpmath_set;
5116 /* Free up memory allocated to hold the strings */
5117 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5118 free (option_strings[i]);
5121 return t;
5124 /* Hook to validate attribute((target("string"))). */
5126 static bool
5127 ix86_valid_target_attribute_p (tree fndecl,
5128 tree ARG_UNUSED (name),
5129 tree args,
5130 int ARG_UNUSED (flags))
5132 struct gcc_options func_options;
5133 tree new_target, new_optimize;
5134 bool ret = true;
5136 /* attribute((target("default"))) does nothing, beyond
5137 affecting multi-versioning. */
5138 if (TREE_VALUE (args)
5139 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5140 && TREE_CHAIN (args) == NULL_TREE
5141 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5142 return true;
5144 tree old_optimize = build_optimization_node (&global_options);
5146 /* Get the optimization options of the current function. */
5147 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5149 if (!func_optimize)
5150 func_optimize = old_optimize;
5152 /* Init func_options. */
5153 memset (&func_options, 0, sizeof (func_options));
5154 init_options_struct (&func_options, NULL);
5155 lang_hooks.init_options_struct (&func_options);
5157 cl_optimization_restore (&func_options,
5158 TREE_OPTIMIZATION (func_optimize));
5160 /* Initialize func_options to the default before its target options can
5161 be set. */
5162 cl_target_option_restore (&func_options,
5163 TREE_TARGET_OPTION (target_option_default_node));
5165 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5166 &global_options_set);
5168 new_optimize = build_optimization_node (&func_options);
5170 if (new_target == error_mark_node)
5171 ret = false;
5173 else if (fndecl && new_target)
5175 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5177 if (old_optimize != new_optimize)
5178 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5181 return ret;
5185 /* Hook to determine if one function can safely inline another. */
5187 static bool
5188 ix86_can_inline_p (tree caller, tree callee)
5190 bool ret = false;
5191 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5192 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5194 /* If callee has no option attributes, then it is ok to inline. */
5195 if (!callee_tree)
5196 ret = true;
5198 /* If caller has no option attributes, but callee does then it is not ok to
5199 inline. */
5200 else if (!caller_tree)
5201 ret = false;
5203 else
5205 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5206 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5208 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5209 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5210 function. */
5211 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5212 != callee_opts->x_ix86_isa_flags)
5213 ret = false;
5215 /* See if we have the same non-isa options. */
5216 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5217 ret = false;
5219 /* See if arch, tune, etc. are the same. */
5220 else if (caller_opts->arch != callee_opts->arch)
5221 ret = false;
5223 else if (caller_opts->tune != callee_opts->tune)
5224 ret = false;
5226 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5227 ret = false;
5229 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5230 ret = false;
5232 else
5233 ret = true;
5236 return ret;
5240 /* Remember the last target of ix86_set_current_function. */
5241 static GTY(()) tree ix86_previous_fndecl;
5243 /* Set targets globals to the default (or current #pragma GCC target
5244 if active). Invalidate ix86_previous_fndecl cache. */
5246 void
5247 ix86_reset_previous_fndecl (void)
5249 tree new_tree = target_option_current_node;
5250 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5251 if (TREE_TARGET_GLOBALS (new_tree))
5252 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5253 else if (new_tree == target_option_default_node)
5254 restore_target_globals (&default_target_globals);
5255 else
5256 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5257 ix86_previous_fndecl = NULL_TREE;
5260 /* Establish appropriate back-end context for processing the function
5261 FNDECL. The argument might be NULL to indicate processing at top
5262 level, outside of any function scope. */
5263 static void
5264 ix86_set_current_function (tree fndecl)
5266 /* Only change the context if the function changes. This hook is called
5267 several times in the course of compiling a function, and we don't want to
5268 slow things down too much or call target_reinit when it isn't safe. */
5269 if (fndecl == ix86_previous_fndecl)
5270 return;
5272 tree old_tree;
5273 if (ix86_previous_fndecl == NULL_TREE)
5274 old_tree = target_option_current_node;
5275 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5276 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5277 else
5278 old_tree = target_option_default_node;
5280 if (fndecl == NULL_TREE)
5282 if (old_tree != target_option_current_node)
5283 ix86_reset_previous_fndecl ();
5284 return;
5287 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5288 if (new_tree == NULL_TREE)
5289 new_tree = target_option_default_node;
5291 if (old_tree != new_tree)
5293 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5294 if (TREE_TARGET_GLOBALS (new_tree))
5295 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5296 else if (new_tree == target_option_default_node)
5297 restore_target_globals (&default_target_globals);
5298 else
5299 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5301 ix86_previous_fndecl = fndecl;
5305 /* Return true if this goes in large data/bss. */
5307 static bool
5308 ix86_in_large_data_p (tree exp)
5310 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5311 return false;
5313 /* Functions are never large data. */
5314 if (TREE_CODE (exp) == FUNCTION_DECL)
5315 return false;
5317 /* Automatic variables are never large data. */
5318 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5319 return false;
5321 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5323 const char *section = DECL_SECTION_NAME (exp);
5324 if (strcmp (section, ".ldata") == 0
5325 || strcmp (section, ".lbss") == 0)
5326 return true;
5327 return false;
5329 else
5331 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5333 /* If this is an incomplete type with size 0, then we can't put it
5334 in data because it might be too big when completed. Also,
5335 int_size_in_bytes returns -1 if size can vary or is larger than
5336 an integer in which case also it is safer to assume that it goes in
5337 large data. */
5338 if (size <= 0 || size > ix86_section_threshold)
5339 return true;
5342 return false;
5345 /* Switch to the appropriate section for output of DECL.
5346 DECL is either a `VAR_DECL' node or a constant of some sort.
5347 RELOC indicates whether forming the initial value of DECL requires
5348 link-time relocations. */
5350 ATTRIBUTE_UNUSED static section *
5351 x86_64_elf_select_section (tree decl, int reloc,
5352 unsigned HOST_WIDE_INT align)
5354 if (ix86_in_large_data_p (decl))
5356 const char *sname = NULL;
5357 unsigned int flags = SECTION_WRITE;
5358 switch (categorize_decl_for_section (decl, reloc))
5360 case SECCAT_DATA:
5361 sname = ".ldata";
5362 break;
5363 case SECCAT_DATA_REL:
5364 sname = ".ldata.rel";
5365 break;
5366 case SECCAT_DATA_REL_LOCAL:
5367 sname = ".ldata.rel.local";
5368 break;
5369 case SECCAT_DATA_REL_RO:
5370 sname = ".ldata.rel.ro";
5371 break;
5372 case SECCAT_DATA_REL_RO_LOCAL:
5373 sname = ".ldata.rel.ro.local";
5374 break;
5375 case SECCAT_BSS:
5376 sname = ".lbss";
5377 flags |= SECTION_BSS;
5378 break;
5379 case SECCAT_RODATA:
5380 case SECCAT_RODATA_MERGE_STR:
5381 case SECCAT_RODATA_MERGE_STR_INIT:
5382 case SECCAT_RODATA_MERGE_CONST:
5383 sname = ".lrodata";
5384 flags = 0;
5385 break;
5386 case SECCAT_SRODATA:
5387 case SECCAT_SDATA:
5388 case SECCAT_SBSS:
5389 gcc_unreachable ();
5390 case SECCAT_TEXT:
5391 case SECCAT_TDATA:
5392 case SECCAT_TBSS:
5393 /* We don't split these for medium model. Place them into
5394 default sections and hope for best. */
5395 break;
5397 if (sname)
5399 /* We might get called with string constants, but get_named_section
5400 doesn't like them as they are not DECLs. Also, we need to set
5401 flags in that case. */
5402 if (!DECL_P (decl))
5403 return get_section (sname, flags, NULL);
5404 return get_named_section (decl, sname, reloc);
5407 return default_elf_select_section (decl, reloc, align);
5410 /* Select a set of attributes for section NAME based on the properties
5411 of DECL and whether or not RELOC indicates that DECL's initializer
5412 might contain runtime relocations. */
5414 static unsigned int ATTRIBUTE_UNUSED
5415 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5417 unsigned int flags = default_section_type_flags (decl, name, reloc);
5419 if (decl == NULL_TREE
5420 && (strcmp (name, ".ldata.rel.ro") == 0
5421 || strcmp (name, ".ldata.rel.ro.local") == 0))
5422 flags |= SECTION_RELRO;
5424 if (strcmp (name, ".lbss") == 0
5425 || strncmp (name, ".lbss.", 5) == 0
5426 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5427 flags |= SECTION_BSS;
5429 return flags;
5432 /* Build up a unique section name, expressed as a
5433 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5434 RELOC indicates whether the initial value of EXP requires
5435 link-time relocations. */
5437 static void ATTRIBUTE_UNUSED
5438 x86_64_elf_unique_section (tree decl, int reloc)
5440 if (ix86_in_large_data_p (decl))
5442 const char *prefix = NULL;
5443 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5444 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5446 switch (categorize_decl_for_section (decl, reloc))
5448 case SECCAT_DATA:
5449 case SECCAT_DATA_REL:
5450 case SECCAT_DATA_REL_LOCAL:
5451 case SECCAT_DATA_REL_RO:
5452 case SECCAT_DATA_REL_RO_LOCAL:
5453 prefix = one_only ? ".ld" : ".ldata";
5454 break;
5455 case SECCAT_BSS:
5456 prefix = one_only ? ".lb" : ".lbss";
5457 break;
5458 case SECCAT_RODATA:
5459 case SECCAT_RODATA_MERGE_STR:
5460 case SECCAT_RODATA_MERGE_STR_INIT:
5461 case SECCAT_RODATA_MERGE_CONST:
5462 prefix = one_only ? ".lr" : ".lrodata";
5463 break;
5464 case SECCAT_SRODATA:
5465 case SECCAT_SDATA:
5466 case SECCAT_SBSS:
5467 gcc_unreachable ();
5468 case SECCAT_TEXT:
5469 case SECCAT_TDATA:
5470 case SECCAT_TBSS:
5471 /* We don't split these for medium model. Place them into
5472 default sections and hope for best. */
5473 break;
5475 if (prefix)
5477 const char *name, *linkonce;
5478 char *string;
5480 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5481 name = targetm.strip_name_encoding (name);
5483 /* If we're using one_only, then there needs to be a .gnu.linkonce
5484 prefix to the section name. */
5485 linkonce = one_only ? ".gnu.linkonce" : "";
5487 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5489 set_decl_section_name (decl, string);
5490 return;
5493 default_unique_section (decl, reloc);
5496 #ifdef COMMON_ASM_OP
5497 /* This says how to output assembler code to declare an
5498 uninitialized external linkage data object.
5500 For medium model x86-64 we need to use .largecomm opcode for
5501 large objects. */
5502 void
5503 x86_elf_aligned_common (FILE *file,
5504 const char *name, unsigned HOST_WIDE_INT size,
5505 int align)
5507 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5508 && size > (unsigned int)ix86_section_threshold)
5509 fputs ("\t.largecomm\t", file);
5510 else
5511 fputs (COMMON_ASM_OP, file);
5512 assemble_name (file, name);
5513 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5514 size, align / BITS_PER_UNIT);
5516 #endif
5518 /* Utility function for targets to use in implementing
5519 ASM_OUTPUT_ALIGNED_BSS. */
5521 void
5522 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5523 unsigned HOST_WIDE_INT size, int align)
5525 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5526 && size > (unsigned int)ix86_section_threshold)
5527 switch_to_section (get_named_section (decl, ".lbss", 0));
5528 else
5529 switch_to_section (bss_section);
5530 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5531 #ifdef ASM_DECLARE_OBJECT_NAME
5532 last_assemble_variable_decl = decl;
5533 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5534 #else
5535 /* Standard thing is just output label for the object. */
5536 ASM_OUTPUT_LABEL (file, name);
5537 #endif /* ASM_DECLARE_OBJECT_NAME */
5538 ASM_OUTPUT_SKIP (file, size ? size : 1);
5541 /* Decide whether we must probe the stack before any space allocation
5542 on this target. It's essentially TARGET_STACK_PROBE except when
5543 -fstack-check causes the stack to be already probed differently. */
5545 bool
5546 ix86_target_stack_probe (void)
5548 /* Do not probe the stack twice if static stack checking is enabled. */
5549 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5550 return false;
5552 return TARGET_STACK_PROBE;
5555 /* Decide whether we can make a sibling call to a function. DECL is the
5556 declaration of the function being targeted by the call and EXP is the
5557 CALL_EXPR representing the call. */
5559 static bool
5560 ix86_function_ok_for_sibcall (tree decl, tree exp)
5562 tree type, decl_or_type;
5563 rtx a, b;
5565 /* If we are generating position-independent code, we cannot sibcall
5566 optimize direct calls to global functions, as the PLT requires
5567 %ebx be live. (Darwin does not have a PLT.) */
5568 if (!TARGET_MACHO
5569 && !TARGET_64BIT
5570 && flag_pic
5571 && flag_plt
5572 && decl && !targetm.binds_local_p (decl))
5573 return false;
5575 /* If we need to align the outgoing stack, then sibcalling would
5576 unalign the stack, which may break the called function. */
5577 if (ix86_minimum_incoming_stack_boundary (true)
5578 < PREFERRED_STACK_BOUNDARY)
5579 return false;
5581 if (decl)
5583 decl_or_type = decl;
5584 type = TREE_TYPE (decl);
5586 else
5588 /* We're looking at the CALL_EXPR, we need the type of the function. */
5589 type = CALL_EXPR_FN (exp); /* pointer expression */
5590 type = TREE_TYPE (type); /* pointer type */
5591 type = TREE_TYPE (type); /* function type */
5592 decl_or_type = type;
5595 /* Check that the return value locations are the same. Like
5596 if we are returning floats on the 80387 register stack, we cannot
5597 make a sibcall from a function that doesn't return a float to a
5598 function that does or, conversely, from a function that does return
5599 a float to a function that doesn't; the necessary stack adjustment
5600 would not be executed. This is also the place we notice
5601 differences in the return value ABI. Note that it is ok for one
5602 of the functions to have void return type as long as the return
5603 value of the other is passed in a register. */
5604 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5605 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5606 cfun->decl, false);
5607 if (STACK_REG_P (a) || STACK_REG_P (b))
5609 if (!rtx_equal_p (a, b))
5610 return false;
5612 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5614 else if (!rtx_equal_p (a, b))
5615 return false;
5617 if (TARGET_64BIT)
5619 /* The SYSV ABI has more call-clobbered registers;
5620 disallow sibcalls from MS to SYSV. */
5621 if (cfun->machine->call_abi == MS_ABI
5622 && ix86_function_type_abi (type) == SYSV_ABI)
5623 return false;
5625 else
5627 /* If this call is indirect, we'll need to be able to use a
5628 call-clobbered register for the address of the target function.
5629 Make sure that all such registers are not used for passing
5630 parameters. Note that DLLIMPORT functions are indirect. */
5631 if (!decl
5632 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5634 /* Check if regparm >= 3 since arg_reg_available is set to
5635 false if regparm == 0. If regparm is 1 or 2, there is
5636 always a call-clobbered register available.
5638 ??? The symbol indirect call doesn't need a call-clobbered
5639 register. But we don't know if this is a symbol indirect
5640 call or not here. */
5641 if (ix86_function_regparm (type, NULL) >= 3
5642 && !cfun->machine->arg_reg_available)
5643 return false;
5647 /* Otherwise okay. That also includes certain types of indirect calls. */
5648 return true;
5651 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5652 and "sseregparm" calling convention attributes;
5653 arguments as in struct attribute_spec.handler. */
5655 static tree
5656 ix86_handle_cconv_attribute (tree *node, tree name,
5657 tree args,
5658 int,
5659 bool *no_add_attrs)
5661 if (TREE_CODE (*node) != FUNCTION_TYPE
5662 && TREE_CODE (*node) != METHOD_TYPE
5663 && TREE_CODE (*node) != FIELD_DECL
5664 && TREE_CODE (*node) != TYPE_DECL)
5666 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5667 name);
5668 *no_add_attrs = true;
5669 return NULL_TREE;
5672 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5673 if (is_attribute_p ("regparm", name))
5675 tree cst;
5677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5679 error ("fastcall and regparm attributes are not compatible");
5682 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5684 error ("regparam and thiscall attributes are not compatible");
5687 cst = TREE_VALUE (args);
5688 if (TREE_CODE (cst) != INTEGER_CST)
5690 warning (OPT_Wattributes,
5691 "%qE attribute requires an integer constant argument",
5692 name);
5693 *no_add_attrs = true;
5695 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5697 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5698 name, REGPARM_MAX);
5699 *no_add_attrs = true;
5702 return NULL_TREE;
5705 if (TARGET_64BIT)
5707 /* Do not warn when emulating the MS ABI. */
5708 if ((TREE_CODE (*node) != FUNCTION_TYPE
5709 && TREE_CODE (*node) != METHOD_TYPE)
5710 || ix86_function_type_abi (*node) != MS_ABI)
5711 warning (OPT_Wattributes, "%qE attribute ignored",
5712 name);
5713 *no_add_attrs = true;
5714 return NULL_TREE;
5717 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5718 if (is_attribute_p ("fastcall", name))
5720 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5722 error ("fastcall and cdecl attributes are not compatible");
5724 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5726 error ("fastcall and stdcall attributes are not compatible");
5728 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5730 error ("fastcall and regparm attributes are not compatible");
5732 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5734 error ("fastcall and thiscall attributes are not compatible");
5738 /* Can combine stdcall with fastcall (redundant), regparm and
5739 sseregparm. */
5740 else if (is_attribute_p ("stdcall", name))
5742 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5744 error ("stdcall and cdecl attributes are not compatible");
5746 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5748 error ("stdcall and fastcall attributes are not compatible");
5750 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5752 error ("stdcall and thiscall attributes are not compatible");
5756 /* Can combine cdecl with regparm and sseregparm. */
5757 else if (is_attribute_p ("cdecl", name))
5759 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5761 error ("stdcall and cdecl attributes are not compatible");
5763 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5765 error ("fastcall and cdecl attributes are not compatible");
5767 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5769 error ("cdecl and thiscall attributes are not compatible");
5772 else if (is_attribute_p ("thiscall", name))
5774 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5775 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5776 name);
5777 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5779 error ("stdcall and thiscall attributes are not compatible");
5781 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5783 error ("fastcall and thiscall attributes are not compatible");
5785 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5787 error ("cdecl and thiscall attributes are not compatible");
5791 /* Can combine sseregparm with all attributes. */
5793 return NULL_TREE;
5796 /* The transactional memory builtins are implicitly regparm or fastcall
5797 depending on the ABI. Override the generic do-nothing attribute that
5798 these builtins were declared with, and replace it with one of the two
5799 attributes that we expect elsewhere. */
5801 static tree
5802 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5803 int flags, bool *no_add_attrs)
5805 tree alt;
5807 /* In no case do we want to add the placeholder attribute. */
5808 *no_add_attrs = true;
5810 /* The 64-bit ABI is unchanged for transactional memory. */
5811 if (TARGET_64BIT)
5812 return NULL_TREE;
5814 /* ??? Is there a better way to validate 32-bit windows? We have
5815 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5816 if (CHECK_STACK_LIMIT > 0)
5817 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5818 else
5820 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5821 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5823 decl_attributes (node, alt, flags);
5825 return NULL_TREE;
5828 /* This function determines from TYPE the calling-convention. */
5830 unsigned int
5831 ix86_get_callcvt (const_tree type)
5833 unsigned int ret = 0;
5834 bool is_stdarg;
5835 tree attrs;
5837 if (TARGET_64BIT)
5838 return IX86_CALLCVT_CDECL;
5840 attrs = TYPE_ATTRIBUTES (type);
5841 if (attrs != NULL_TREE)
5843 if (lookup_attribute ("cdecl", attrs))
5844 ret |= IX86_CALLCVT_CDECL;
5845 else if (lookup_attribute ("stdcall", attrs))
5846 ret |= IX86_CALLCVT_STDCALL;
5847 else if (lookup_attribute ("fastcall", attrs))
5848 ret |= IX86_CALLCVT_FASTCALL;
5849 else if (lookup_attribute ("thiscall", attrs))
5850 ret |= IX86_CALLCVT_THISCALL;
5852 /* Regparam isn't allowed for thiscall and fastcall. */
5853 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5855 if (lookup_attribute ("regparm", attrs))
5856 ret |= IX86_CALLCVT_REGPARM;
5857 if (lookup_attribute ("sseregparm", attrs))
5858 ret |= IX86_CALLCVT_SSEREGPARM;
5861 if (IX86_BASE_CALLCVT(ret) != 0)
5862 return ret;
5865 is_stdarg = stdarg_p (type);
5866 if (TARGET_RTD && !is_stdarg)
5867 return IX86_CALLCVT_STDCALL | ret;
5869 if (ret != 0
5870 || is_stdarg
5871 || TREE_CODE (type) != METHOD_TYPE
5872 || ix86_function_type_abi (type) != MS_ABI)
5873 return IX86_CALLCVT_CDECL | ret;
5875 return IX86_CALLCVT_THISCALL;
5878 /* Return 0 if the attributes for two types are incompatible, 1 if they
5879 are compatible, and 2 if they are nearly compatible (which causes a
5880 warning to be generated). */
5882 static int
5883 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5885 unsigned int ccvt1, ccvt2;
5887 if (TREE_CODE (type1) != FUNCTION_TYPE
5888 && TREE_CODE (type1) != METHOD_TYPE)
5889 return 1;
5891 ccvt1 = ix86_get_callcvt (type1);
5892 ccvt2 = ix86_get_callcvt (type2);
5893 if (ccvt1 != ccvt2)
5894 return 0;
5895 if (ix86_function_regparm (type1, NULL)
5896 != ix86_function_regparm (type2, NULL))
5897 return 0;
5899 return 1;
5902 /* Return the regparm value for a function with the indicated TYPE and DECL.
5903 DECL may be NULL when calling function indirectly
5904 or considering a libcall. */
5906 static int
5907 ix86_function_regparm (const_tree type, const_tree decl)
5909 tree attr;
5910 int regparm;
5911 unsigned int ccvt;
5913 if (TARGET_64BIT)
5914 return (ix86_function_type_abi (type) == SYSV_ABI
5915 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5916 ccvt = ix86_get_callcvt (type);
5917 regparm = ix86_regparm;
5919 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5921 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5922 if (attr)
5924 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5925 return regparm;
5928 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5929 return 2;
5930 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5931 return 1;
5933 /* Use register calling convention for local functions when possible. */
5934 if (decl
5935 && TREE_CODE (decl) == FUNCTION_DECL)
5937 cgraph_node *target = cgraph_node::get (decl);
5938 if (target)
5939 target = target->function_symbol ();
5941 /* Caller and callee must agree on the calling convention, so
5942 checking here just optimize means that with
5943 __attribute__((optimize (...))) caller could use regparm convention
5944 and callee not, or vice versa. Instead look at whether the callee
5945 is optimized or not. */
5946 if (target && opt_for_fn (target->decl, optimize)
5947 && !(profile_flag && !flag_fentry))
5949 cgraph_local_info *i = &target->local;
5950 if (i && i->local && i->can_change_signature)
5952 int local_regparm, globals = 0, regno;
5954 /* Make sure no regparm register is taken by a
5955 fixed register variable. */
5956 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5957 local_regparm++)
5958 if (fixed_regs[local_regparm])
5959 break;
5961 /* We don't want to use regparm(3) for nested functions as
5962 these use a static chain pointer in the third argument. */
5963 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5964 local_regparm = 2;
5966 /* Save a register for the split stack. */
5967 if (local_regparm == 3 && flag_split_stack)
5968 local_regparm = 2;
5970 /* Each fixed register usage increases register pressure,
5971 so less registers should be used for argument passing.
5972 This functionality can be overriden by an explicit
5973 regparm value. */
5974 for (regno = AX_REG; regno <= DI_REG; regno++)
5975 if (fixed_regs[regno])
5976 globals++;
5978 local_regparm
5979 = globals < local_regparm ? local_regparm - globals : 0;
5981 if (local_regparm > regparm)
5982 regparm = local_regparm;
5987 return regparm;
5990 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5991 DFmode (2) arguments in SSE registers for a function with the
5992 indicated TYPE and DECL. DECL may be NULL when calling function
5993 indirectly or considering a libcall. Return -1 if any FP parameter
5994 should be rejected by error. This is used in siutation we imply SSE
5995 calling convetion but the function is called from another function with
5996 SSE disabled. Otherwise return 0. */
5998 static int
5999 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
6001 gcc_assert (!TARGET_64BIT);
6003 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6004 by the sseregparm attribute. */
6005 if (TARGET_SSEREGPARM
6006 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6008 if (!TARGET_SSE)
6010 if (warn)
6012 if (decl)
6013 error ("calling %qD with attribute sseregparm without "
6014 "SSE/SSE2 enabled", decl);
6015 else
6016 error ("calling %qT with attribute sseregparm without "
6017 "SSE/SSE2 enabled", type);
6019 return 0;
6022 return 2;
6025 if (!decl)
6026 return 0;
6028 cgraph_node *target = cgraph_node::get (decl);
6029 if (target)
6030 target = target->function_symbol ();
6032 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6033 (and DFmode for SSE2) arguments in SSE registers. */
6034 if (target
6035 /* TARGET_SSE_MATH */
6036 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6037 && opt_for_fn (target->decl, optimize)
6038 && !(profile_flag && !flag_fentry))
6040 cgraph_local_info *i = &target->local;
6041 if (i && i->local && i->can_change_signature)
6043 /* Refuse to produce wrong code when local function with SSE enabled
6044 is called from SSE disabled function.
6045 FIXME: We need a way to detect these cases cross-ltrans partition
6046 and avoid using SSE calling conventions on local functions called
6047 from function with SSE disabled. For now at least delay the
6048 warning until we know we are going to produce wrong code.
6049 See PR66047 */
6050 if (!TARGET_SSE && warn)
6051 return -1;
6052 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6053 ->x_ix86_isa_flags) ? 2 : 1;
6057 return 0;
6060 /* Return true if EAX is live at the start of the function. Used by
6061 ix86_expand_prologue to determine if we need special help before
6062 calling allocate_stack_worker. */
6064 static bool
6065 ix86_eax_live_at_start_p (void)
6067 /* Cheat. Don't bother working forward from ix86_function_regparm
6068 to the function type to whether an actual argument is located in
6069 eax. Instead just look at cfg info, which is still close enough
6070 to correct at this point. This gives false positives for broken
6071 functions that might use uninitialized data that happens to be
6072 allocated in eax, but who cares? */
6073 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6076 static bool
6077 ix86_keep_aggregate_return_pointer (tree fntype)
6079 tree attr;
6081 if (!TARGET_64BIT)
6083 attr = lookup_attribute ("callee_pop_aggregate_return",
6084 TYPE_ATTRIBUTES (fntype));
6085 if (attr)
6086 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6088 /* For 32-bit MS-ABI the default is to keep aggregate
6089 return pointer. */
6090 if (ix86_function_type_abi (fntype) == MS_ABI)
6091 return true;
6093 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6096 /* Value is the number of bytes of arguments automatically
6097 popped when returning from a subroutine call.
6098 FUNDECL is the declaration node of the function (as a tree),
6099 FUNTYPE is the data type of the function (as a tree),
6100 or for a library call it is an identifier node for the subroutine name.
6101 SIZE is the number of bytes of arguments passed on the stack.
6103 On the 80386, the RTD insn may be used to pop them if the number
6104 of args is fixed, but if the number is variable then the caller
6105 must pop them all. RTD can't be used for library calls now
6106 because the library is compiled with the Unix compiler.
6107 Use of RTD is a selectable option, since it is incompatible with
6108 standard Unix calling sequences. If the option is not selected,
6109 the caller must always pop the args.
6111 The attribute stdcall is equivalent to RTD on a per module basis. */
6113 static int
6114 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6116 unsigned int ccvt;
6118 /* None of the 64-bit ABIs pop arguments. */
6119 if (TARGET_64BIT)
6120 return 0;
6122 ccvt = ix86_get_callcvt (funtype);
6124 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6125 | IX86_CALLCVT_THISCALL)) != 0
6126 && ! stdarg_p (funtype))
6127 return size;
6129 /* Lose any fake structure return argument if it is passed on the stack. */
6130 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6131 && !ix86_keep_aggregate_return_pointer (funtype))
6133 int nregs = ix86_function_regparm (funtype, fundecl);
6134 if (nregs == 0)
6135 return GET_MODE_SIZE (Pmode);
6138 return 0;
6141 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6143 static bool
6144 ix86_legitimate_combined_insn (rtx_insn *insn)
6146 /* Check operand constraints in case hard registers were propagated
6147 into insn pattern. This check prevents combine pass from
6148 generating insn patterns with invalid hard register operands.
6149 These invalid insns can eventually confuse reload to error out
6150 with a spill failure. See also PRs 46829 and 46843. */
6151 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6153 int i;
6155 extract_insn (insn);
6156 preprocess_constraints (insn);
6158 int n_operands = recog_data.n_operands;
6159 int n_alternatives = recog_data.n_alternatives;
6160 for (i = 0; i < n_operands; i++)
6162 rtx op = recog_data.operand[i];
6163 machine_mode mode = GET_MODE (op);
6164 const operand_alternative *op_alt;
6165 int offset = 0;
6166 bool win;
6167 int j;
6169 /* For pre-AVX disallow unaligned loads/stores where the
6170 instructions don't support it. */
6171 if (!TARGET_AVX
6172 && VECTOR_MODE_P (GET_MODE (op))
6173 && misaligned_operand (op, GET_MODE (op)))
6175 int min_align = get_attr_ssememalign (insn);
6176 if (min_align == 0)
6177 return false;
6180 /* A unary operator may be accepted by the predicate, but it
6181 is irrelevant for matching constraints. */
6182 if (UNARY_P (op))
6183 op = XEXP (op, 0);
6185 if (SUBREG_P (op))
6187 if (REG_P (SUBREG_REG (op))
6188 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6189 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6190 GET_MODE (SUBREG_REG (op)),
6191 SUBREG_BYTE (op),
6192 GET_MODE (op));
6193 op = SUBREG_REG (op);
6196 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6197 continue;
6199 op_alt = recog_op_alt;
6201 /* Operand has no constraints, anything is OK. */
6202 win = !n_alternatives;
6204 alternative_mask preferred = get_preferred_alternatives (insn);
6205 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6207 if (!TEST_BIT (preferred, j))
6208 continue;
6209 if (op_alt[i].anything_ok
6210 || (op_alt[i].matches != -1
6211 && operands_match_p
6212 (recog_data.operand[i],
6213 recog_data.operand[op_alt[i].matches]))
6214 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6216 win = true;
6217 break;
6221 if (!win)
6222 return false;
6226 return true;
6229 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6231 static unsigned HOST_WIDE_INT
6232 ix86_asan_shadow_offset (void)
6234 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6235 : HOST_WIDE_INT_C (0x7fff8000))
6236 : (HOST_WIDE_INT_1 << 29);
6239 /* Argument support functions. */
6241 /* Return true when register may be used to pass function parameters. */
6242 bool
6243 ix86_function_arg_regno_p (int regno)
6245 int i;
6246 enum calling_abi call_abi;
6247 const int *parm_regs;
6249 if (TARGET_MPX && BND_REGNO_P (regno))
6250 return true;
6252 if (!TARGET_64BIT)
6254 if (TARGET_MACHO)
6255 return (regno < REGPARM_MAX
6256 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6257 else
6258 return (regno < REGPARM_MAX
6259 || (TARGET_MMX && MMX_REGNO_P (regno)
6260 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6261 || (TARGET_SSE && SSE_REGNO_P (regno)
6262 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6265 if (TARGET_SSE && SSE_REGNO_P (regno)
6266 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6267 return true;
6269 /* TODO: The function should depend on current function ABI but
6270 builtins.c would need updating then. Therefore we use the
6271 default ABI. */
6272 call_abi = ix86_cfun_abi ();
6274 /* RAX is used as hidden argument to va_arg functions. */
6275 if (call_abi == SYSV_ABI && regno == AX_REG)
6276 return true;
6278 if (call_abi == MS_ABI)
6279 parm_regs = x86_64_ms_abi_int_parameter_registers;
6280 else
6281 parm_regs = x86_64_int_parameter_registers;
6283 for (i = 0; i < (call_abi == MS_ABI
6284 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6285 if (regno == parm_regs[i])
6286 return true;
6287 return false;
6290 /* Return if we do not know how to pass TYPE solely in registers. */
6292 static bool
6293 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6295 if (must_pass_in_stack_var_size_or_pad (mode, type))
6296 return true;
6298 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6299 The layout_type routine is crafty and tries to trick us into passing
6300 currently unsupported vector types on the stack by using TImode. */
6301 return (!TARGET_64BIT && mode == TImode
6302 && type && TREE_CODE (type) != VECTOR_TYPE);
6305 /* It returns the size, in bytes, of the area reserved for arguments passed
6306 in registers for the function represented by fndecl dependent to the used
6307 abi format. */
6309 ix86_reg_parm_stack_space (const_tree fndecl)
6311 enum calling_abi call_abi = SYSV_ABI;
6312 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6313 call_abi = ix86_function_abi (fndecl);
6314 else
6315 call_abi = ix86_function_type_abi (fndecl);
6316 if (TARGET_64BIT && call_abi == MS_ABI)
6317 return 32;
6318 return 0;
6321 /* We add this as a workaround in order to use libc_has_function
6322 hook in i386.md. */
6323 bool
6324 ix86_libc_has_function (enum function_class fn_class)
6326 return targetm.libc_has_function (fn_class);
6329 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6330 specifying the call abi used. */
6331 enum calling_abi
6332 ix86_function_type_abi (const_tree fntype)
6334 enum calling_abi abi = ix86_abi;
6336 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6337 return abi;
6339 if (abi == SYSV_ABI
6340 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6342 if (TARGET_X32)
6343 error ("X32 does not support ms_abi attribute");
6345 abi = MS_ABI;
6347 else if (abi == MS_ABI
6348 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6349 abi = SYSV_ABI;
6351 return abi;
6354 static enum calling_abi
6355 ix86_function_abi (const_tree fndecl)
6357 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6360 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6361 specifying the call abi used. */
6362 enum calling_abi
6363 ix86_cfun_abi (void)
6365 return cfun ? cfun->machine->call_abi : ix86_abi;
6368 static bool
6369 ix86_function_ms_hook_prologue (const_tree fn)
6371 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6373 if (decl_function_context (fn) != NULL_TREE)
6374 error_at (DECL_SOURCE_LOCATION (fn),
6375 "ms_hook_prologue is not compatible with nested function");
6376 else
6377 return true;
6379 return false;
6382 /* Write the extra assembler code needed to declare a function properly. */
6384 void
6385 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6386 tree decl)
6388 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6390 if (is_ms_hook)
6392 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6393 unsigned int filler_cc = 0xcccccccc;
6395 for (i = 0; i < filler_count; i += 4)
6396 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6399 #ifdef SUBTARGET_ASM_UNWIND_INIT
6400 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6401 #endif
6403 ASM_OUTPUT_LABEL (asm_out_file, fname);
6405 /* Output magic byte marker, if hot-patch attribute is set. */
6406 if (is_ms_hook)
6408 if (TARGET_64BIT)
6410 /* leaq [%rsp + 0], %rsp */
6411 asm_fprintf (asm_out_file, ASM_BYTE
6412 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6414 else
6416 /* movl.s %edi, %edi
6417 push %ebp
6418 movl.s %esp, %ebp */
6419 asm_fprintf (asm_out_file, ASM_BYTE
6420 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6425 /* regclass.c */
6426 extern void init_regs (void);
6428 /* Implementation of call abi switching target hook. Specific to FNDECL
6429 the specific call register sets are set. See also
6430 ix86_conditional_register_usage for more details. */
6431 void
6432 ix86_call_abi_override (const_tree fndecl)
6434 cfun->machine->call_abi = ix86_function_abi (fndecl);
6437 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6438 expensive re-initialization of init_regs each time we switch function context
6439 since this is needed only during RTL expansion. */
6440 static void
6441 ix86_maybe_switch_abi (void)
6443 if (TARGET_64BIT &&
6444 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6445 reinit_regs ();
6448 /* Return 1 if pseudo register should be created and used to hold
6449 GOT address for PIC code. */
6450 bool
6451 ix86_use_pseudo_pic_reg (void)
6453 if ((TARGET_64BIT
6454 && (ix86_cmodel == CM_SMALL_PIC
6455 || TARGET_PECOFF))
6456 || !flag_pic)
6457 return false;
6458 return true;
6461 /* Initialize large model PIC register. */
6463 static void
6464 ix86_init_large_pic_reg (unsigned int tmp_regno)
6466 rtx_code_label *label;
6467 rtx tmp_reg;
6469 gcc_assert (Pmode == DImode);
6470 label = gen_label_rtx ();
6471 emit_label (label);
6472 LABEL_PRESERVE_P (label) = 1;
6473 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6474 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6475 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6476 label));
6477 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6478 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6479 pic_offset_table_rtx, tmp_reg));
6482 /* Create and initialize PIC register if required. */
6483 static void
6484 ix86_init_pic_reg (void)
6486 edge entry_edge;
6487 rtx_insn *seq;
6489 if (!ix86_use_pseudo_pic_reg ())
6490 return;
6492 start_sequence ();
6494 if (TARGET_64BIT)
6496 if (ix86_cmodel == CM_LARGE_PIC)
6497 ix86_init_large_pic_reg (R11_REG);
6498 else
6499 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6501 else
6503 /* If there is future mcount call in the function it is more profitable
6504 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6505 rtx reg = crtl->profile
6506 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6507 : pic_offset_table_rtx;
6508 rtx_insn *insn = emit_insn (gen_set_got (reg));
6509 RTX_FRAME_RELATED_P (insn) = 1;
6510 if (crtl->profile)
6511 emit_move_insn (pic_offset_table_rtx, reg);
6512 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6515 seq = get_insns ();
6516 end_sequence ();
6518 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6519 insert_insn_on_edge (seq, entry_edge);
6520 commit_one_edge_insertion (entry_edge);
6523 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6524 for a call to a function whose data type is FNTYPE.
6525 For a library call, FNTYPE is 0. */
6527 void
6528 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6529 tree fntype, /* tree ptr for function decl */
6530 rtx libname, /* SYMBOL_REF of library name or 0 */
6531 tree fndecl,
6532 int caller)
6534 struct cgraph_local_info *i = NULL;
6535 struct cgraph_node *target = NULL;
6537 memset (cum, 0, sizeof (*cum));
6539 if (fndecl)
6541 target = cgraph_node::get (fndecl);
6542 if (target)
6544 target = target->function_symbol ();
6545 i = cgraph_node::local_info (target->decl);
6546 cum->call_abi = ix86_function_abi (target->decl);
6548 else
6549 cum->call_abi = ix86_function_abi (fndecl);
6551 else
6552 cum->call_abi = ix86_function_type_abi (fntype);
6554 cum->caller = caller;
6556 /* Set up the number of registers to use for passing arguments. */
6557 cum->nregs = ix86_regparm;
6558 if (TARGET_64BIT)
6560 cum->nregs = (cum->call_abi == SYSV_ABI
6561 ? X86_64_REGPARM_MAX
6562 : X86_64_MS_REGPARM_MAX);
6564 if (TARGET_SSE)
6566 cum->sse_nregs = SSE_REGPARM_MAX;
6567 if (TARGET_64BIT)
6569 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6570 ? X86_64_SSE_REGPARM_MAX
6571 : X86_64_MS_SSE_REGPARM_MAX);
6574 if (TARGET_MMX)
6575 cum->mmx_nregs = MMX_REGPARM_MAX;
6576 cum->warn_avx512f = true;
6577 cum->warn_avx = true;
6578 cum->warn_sse = true;
6579 cum->warn_mmx = true;
6581 /* Because type might mismatch in between caller and callee, we need to
6582 use actual type of function for local calls.
6583 FIXME: cgraph_analyze can be told to actually record if function uses
6584 va_start so for local functions maybe_vaarg can be made aggressive
6585 helping K&R code.
6586 FIXME: once typesytem is fixed, we won't need this code anymore. */
6587 if (i && i->local && i->can_change_signature)
6588 fntype = TREE_TYPE (target->decl);
6589 cum->stdarg = stdarg_p (fntype);
6590 cum->maybe_vaarg = (fntype
6591 ? (!prototype_p (fntype) || stdarg_p (fntype))
6592 : !libname);
6594 cum->bnd_regno = FIRST_BND_REG;
6595 cum->bnds_in_bt = 0;
6596 cum->force_bnd_pass = 0;
6597 cum->decl = fndecl;
6599 if (!TARGET_64BIT)
6601 /* If there are variable arguments, then we won't pass anything
6602 in registers in 32-bit mode. */
6603 if (stdarg_p (fntype))
6605 cum->nregs = 0;
6606 /* Since in 32-bit, variable arguments are always passed on
6607 stack, there is scratch register available for indirect
6608 sibcall. */
6609 cfun->machine->arg_reg_available = true;
6610 cum->sse_nregs = 0;
6611 cum->mmx_nregs = 0;
6612 cum->warn_avx512f = false;
6613 cum->warn_avx = false;
6614 cum->warn_sse = false;
6615 cum->warn_mmx = false;
6616 return;
6619 /* Use ecx and edx registers if function has fastcall attribute,
6620 else look for regparm information. */
6621 if (fntype)
6623 unsigned int ccvt = ix86_get_callcvt (fntype);
6624 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6626 cum->nregs = 1;
6627 cum->fastcall = 1; /* Same first register as in fastcall. */
6629 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6631 cum->nregs = 2;
6632 cum->fastcall = 1;
6634 else
6635 cum->nregs = ix86_function_regparm (fntype, fndecl);
6638 /* Set up the number of SSE registers used for passing SFmode
6639 and DFmode arguments. Warn for mismatching ABI. */
6640 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6643 cfun->machine->arg_reg_available = (cum->nregs > 0);
6646 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6647 But in the case of vector types, it is some vector mode.
6649 When we have only some of our vector isa extensions enabled, then there
6650 are some modes for which vector_mode_supported_p is false. For these
6651 modes, the generic vector support in gcc will choose some non-vector mode
6652 in order to implement the type. By computing the natural mode, we'll
6653 select the proper ABI location for the operand and not depend on whatever
6654 the middle-end decides to do with these vector types.
6656 The midde-end can't deal with the vector types > 16 bytes. In this
6657 case, we return the original mode and warn ABI change if CUM isn't
6658 NULL.
6660 If INT_RETURN is true, warn ABI change if the vector mode isn't
6661 available for function return value. */
6663 static machine_mode
6664 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6665 bool in_return)
6667 machine_mode mode = TYPE_MODE (type);
6669 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6671 HOST_WIDE_INT size = int_size_in_bytes (type);
6672 if ((size == 8 || size == 16 || size == 32 || size == 64)
6673 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6674 && TYPE_VECTOR_SUBPARTS (type) > 1)
6676 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6678 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6679 mode = MIN_MODE_VECTOR_FLOAT;
6680 else
6681 mode = MIN_MODE_VECTOR_INT;
6683 /* Get the mode which has this inner mode and number of units. */
6684 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6685 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6686 && GET_MODE_INNER (mode) == innermode)
6688 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
6690 static bool warnedavx512f;
6691 static bool warnedavx512f_ret;
6693 if (cum && cum->warn_avx512f && !warnedavx512f)
6695 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6696 "without AVX512F enabled changes the ABI"))
6697 warnedavx512f = true;
6699 else if (in_return && !warnedavx512f_ret)
6701 if (warning (OPT_Wpsabi, "AVX512F vector return "
6702 "without AVX512F enabled changes the ABI"))
6703 warnedavx512f_ret = true;
6706 return TYPE_MODE (type);
6708 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
6710 static bool warnedavx;
6711 static bool warnedavx_ret;
6713 if (cum && cum->warn_avx && !warnedavx)
6715 if (warning (OPT_Wpsabi, "AVX vector argument "
6716 "without AVX enabled changes the ABI"))
6717 warnedavx = true;
6719 else if (in_return && !warnedavx_ret)
6721 if (warning (OPT_Wpsabi, "AVX vector return "
6722 "without AVX enabled changes the ABI"))
6723 warnedavx_ret = true;
6726 return TYPE_MODE (type);
6728 else if (((size == 8 && TARGET_64BIT) || size == 16)
6729 && !TARGET_SSE
6730 && !TARGET_IAMCU)
6732 static bool warnedsse;
6733 static bool warnedsse_ret;
6735 if (cum && cum->warn_sse && !warnedsse)
6737 if (warning (OPT_Wpsabi, "SSE vector argument "
6738 "without SSE enabled changes the ABI"))
6739 warnedsse = true;
6741 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6743 if (warning (OPT_Wpsabi, "SSE vector return "
6744 "without SSE enabled changes the ABI"))
6745 warnedsse_ret = true;
6748 else if ((size == 8 && !TARGET_64BIT)
6749 && !TARGET_MMX
6750 && !TARGET_IAMCU)
6752 static bool warnedmmx;
6753 static bool warnedmmx_ret;
6755 if (cum && cum->warn_mmx && !warnedmmx)
6757 if (warning (OPT_Wpsabi, "MMX vector argument "
6758 "without MMX enabled changes the ABI"))
6759 warnedmmx = true;
6761 else if (in_return && !warnedmmx_ret)
6763 if (warning (OPT_Wpsabi, "MMX vector return "
6764 "without MMX enabled changes the ABI"))
6765 warnedmmx_ret = true;
6768 return mode;
6771 gcc_unreachable ();
6775 return mode;
6778 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6779 this may not agree with the mode that the type system has chosen for the
6780 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6781 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6783 static rtx
6784 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6785 unsigned int regno)
6787 rtx tmp;
6789 if (orig_mode != BLKmode)
6790 tmp = gen_rtx_REG (orig_mode, regno);
6791 else
6793 tmp = gen_rtx_REG (mode, regno);
6794 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6795 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6798 return tmp;
6801 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6802 of this code is to classify each 8bytes of incoming argument by the register
6803 class and assign registers accordingly. */
6805 /* Return the union class of CLASS1 and CLASS2.
6806 See the x86-64 PS ABI for details. */
6808 static enum x86_64_reg_class
6809 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6811 /* Rule #1: If both classes are equal, this is the resulting class. */
6812 if (class1 == class2)
6813 return class1;
6815 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6816 the other class. */
6817 if (class1 == X86_64_NO_CLASS)
6818 return class2;
6819 if (class2 == X86_64_NO_CLASS)
6820 return class1;
6822 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6823 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6824 return X86_64_MEMORY_CLASS;
6826 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6827 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6828 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6829 return X86_64_INTEGERSI_CLASS;
6830 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6831 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6832 return X86_64_INTEGER_CLASS;
6834 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6835 MEMORY is used. */
6836 if (class1 == X86_64_X87_CLASS
6837 || class1 == X86_64_X87UP_CLASS
6838 || class1 == X86_64_COMPLEX_X87_CLASS
6839 || class2 == X86_64_X87_CLASS
6840 || class2 == X86_64_X87UP_CLASS
6841 || class2 == X86_64_COMPLEX_X87_CLASS)
6842 return X86_64_MEMORY_CLASS;
6844 /* Rule #6: Otherwise class SSE is used. */
6845 return X86_64_SSE_CLASS;
6848 /* Classify the argument of type TYPE and mode MODE.
6849 CLASSES will be filled by the register class used to pass each word
6850 of the operand. The number of words is returned. In case the parameter
6851 should be passed in memory, 0 is returned. As a special case for zero
6852 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6854 BIT_OFFSET is used internally for handling records and specifies offset
6855 of the offset in bits modulo 512 to avoid overflow cases.
6857 See the x86-64 PS ABI for details.
6860 static int
6861 classify_argument (machine_mode mode, const_tree type,
6862 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6864 HOST_WIDE_INT bytes =
6865 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6866 int words
6867 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6869 /* Variable sized entities are always passed/returned in memory. */
6870 if (bytes < 0)
6871 return 0;
6873 if (mode != VOIDmode
6874 && targetm.calls.must_pass_in_stack (mode, type))
6875 return 0;
6877 /* Special case check for pointer to shared, on 64-bit target. */
6878 if (TARGET_64BIT && mode == TImode
6879 && type && TREE_CODE (type) == POINTER_TYPE
6880 && upc_shared_type_p (TREE_TYPE (type)))
6882 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6883 return 2;
6886 if (type && AGGREGATE_TYPE_P (type))
6888 int i;
6889 tree field;
6890 enum x86_64_reg_class subclasses[MAX_CLASSES];
6892 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6893 if (bytes > 64)
6894 return 0;
6896 for (i = 0; i < words; i++)
6897 classes[i] = X86_64_NO_CLASS;
6899 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6900 signalize memory class, so handle it as special case. */
6901 if (!words)
6903 classes[0] = X86_64_NO_CLASS;
6904 return 1;
6907 /* Classify each field of record and merge classes. */
6908 switch (TREE_CODE (type))
6910 case RECORD_TYPE:
6911 /* And now merge the fields of structure. */
6912 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6914 if (TREE_CODE (field) == FIELD_DECL)
6916 int num;
6918 if (TREE_TYPE (field) == error_mark_node)
6919 continue;
6921 /* Bitfields are always classified as integer. Handle them
6922 early, since later code would consider them to be
6923 misaligned integers. */
6924 if (DECL_BIT_FIELD (field))
6926 for (i = (int_bit_position (field)
6927 + (bit_offset % 64)) / 8 / 8;
6928 i < ((int_bit_position (field) + (bit_offset % 64))
6929 + tree_to_shwi (DECL_SIZE (field))
6930 + 63) / 8 / 8; i++)
6931 classes[i] =
6932 merge_classes (X86_64_INTEGER_CLASS,
6933 classes[i]);
6935 else
6937 int pos;
6939 type = TREE_TYPE (field);
6941 /* Flexible array member is ignored. */
6942 if (TYPE_MODE (type) == BLKmode
6943 && TREE_CODE (type) == ARRAY_TYPE
6944 && TYPE_SIZE (type) == NULL_TREE
6945 && TYPE_DOMAIN (type) != NULL_TREE
6946 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6947 == NULL_TREE))
6949 static bool warned;
6951 if (!warned && warn_psabi)
6953 warned = true;
6954 inform (input_location,
6955 "the ABI of passing struct with"
6956 " a flexible array member has"
6957 " changed in GCC 4.4");
6959 continue;
6961 num = classify_argument (TYPE_MODE (type), type,
6962 subclasses,
6963 (int_bit_position (field)
6964 + bit_offset) % 512);
6965 if (!num)
6966 return 0;
6967 pos = (int_bit_position (field)
6968 + (bit_offset % 64)) / 8 / 8;
6969 for (i = 0; i < num && (i + pos) < words; i++)
6970 classes[i + pos] =
6971 merge_classes (subclasses[i], classes[i + pos]);
6975 break;
6977 case ARRAY_TYPE:
6978 /* Arrays are handled as small records. */
6980 int num;
6981 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6982 TREE_TYPE (type), subclasses, bit_offset);
6983 if (!num)
6984 return 0;
6986 /* The partial classes are now full classes. */
6987 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6988 subclasses[0] = X86_64_SSE_CLASS;
6989 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6990 && !((bit_offset % 64) == 0 && bytes == 4))
6991 subclasses[0] = X86_64_INTEGER_CLASS;
6993 for (i = 0; i < words; i++)
6994 classes[i] = subclasses[i % num];
6996 break;
6998 case UNION_TYPE:
6999 case QUAL_UNION_TYPE:
7000 /* Unions are similar to RECORD_TYPE but offset is always 0.
7002 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7004 if (TREE_CODE (field) == FIELD_DECL)
7006 int num;
7008 if (TREE_TYPE (field) == error_mark_node)
7009 continue;
7011 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7012 TREE_TYPE (field), subclasses,
7013 bit_offset);
7014 if (!num)
7015 return 0;
7016 for (i = 0; i < num && i < words; i++)
7017 classes[i] = merge_classes (subclasses[i], classes[i]);
7020 break;
7022 default:
7023 gcc_unreachable ();
7026 if (words > 2)
7028 /* When size > 16 bytes, if the first one isn't
7029 X86_64_SSE_CLASS or any other ones aren't
7030 X86_64_SSEUP_CLASS, everything should be passed in
7031 memory. */
7032 if (classes[0] != X86_64_SSE_CLASS)
7033 return 0;
7035 for (i = 1; i < words; i++)
7036 if (classes[i] != X86_64_SSEUP_CLASS)
7037 return 0;
7040 /* Final merger cleanup. */
7041 for (i = 0; i < words; i++)
7043 /* If one class is MEMORY, everything should be passed in
7044 memory. */
7045 if (classes[i] == X86_64_MEMORY_CLASS)
7046 return 0;
7048 /* The X86_64_SSEUP_CLASS should be always preceded by
7049 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7050 if (classes[i] == X86_64_SSEUP_CLASS
7051 && classes[i - 1] != X86_64_SSE_CLASS
7052 && classes[i - 1] != X86_64_SSEUP_CLASS)
7054 /* The first one should never be X86_64_SSEUP_CLASS. */
7055 gcc_assert (i != 0);
7056 classes[i] = X86_64_SSE_CLASS;
7059 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7060 everything should be passed in memory. */
7061 if (classes[i] == X86_64_X87UP_CLASS
7062 && (classes[i - 1] != X86_64_X87_CLASS))
7064 static bool warned;
7066 /* The first one should never be X86_64_X87UP_CLASS. */
7067 gcc_assert (i != 0);
7068 if (!warned && warn_psabi)
7070 warned = true;
7071 inform (input_location,
7072 "the ABI of passing union with long double"
7073 " has changed in GCC 4.4");
7075 return 0;
7078 return words;
7081 /* Compute alignment needed. We align all types to natural boundaries with
7082 exception of XFmode that is aligned to 64bits. */
7083 if (mode != VOIDmode && mode != BLKmode)
7085 int mode_alignment = GET_MODE_BITSIZE (mode);
7087 if (mode == XFmode)
7088 mode_alignment = 128;
7089 else if (mode == XCmode)
7090 mode_alignment = 256;
7091 if (COMPLEX_MODE_P (mode))
7092 mode_alignment /= 2;
7093 /* Misaligned fields are always returned in memory. */
7094 if (bit_offset % mode_alignment)
7095 return 0;
7098 /* for V1xx modes, just use the base mode */
7099 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7100 && GET_MODE_UNIT_SIZE (mode) == bytes)
7101 mode = GET_MODE_INNER (mode);
7103 /* Classification of atomic types. */
7104 switch (mode)
7106 case SDmode:
7107 case DDmode:
7108 classes[0] = X86_64_SSE_CLASS;
7109 return 1;
7110 case TDmode:
7111 classes[0] = X86_64_SSE_CLASS;
7112 classes[1] = X86_64_SSEUP_CLASS;
7113 return 2;
7114 case DImode:
7115 case SImode:
7116 case HImode:
7117 case QImode:
7118 case CSImode:
7119 case CHImode:
7120 case CQImode:
7122 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7124 /* Analyze last 128 bits only. */
7125 size = (size - 1) & 0x7f;
7127 if (size < 32)
7129 classes[0] = X86_64_INTEGERSI_CLASS;
7130 return 1;
7132 else if (size < 64)
7134 classes[0] = X86_64_INTEGER_CLASS;
7135 return 1;
7137 else if (size < 64+32)
7139 classes[0] = X86_64_INTEGER_CLASS;
7140 classes[1] = X86_64_INTEGERSI_CLASS;
7141 return 2;
7143 else if (size < 64+64)
7145 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7146 return 2;
7148 else
7149 gcc_unreachable ();
7151 case CDImode:
7152 case TImode:
7153 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7154 return 2;
7155 case COImode:
7156 case OImode:
7157 /* OImode shouldn't be used directly. */
7158 gcc_unreachable ();
7159 case CTImode:
7160 return 0;
7161 case SFmode:
7162 if (!(bit_offset % 64))
7163 classes[0] = X86_64_SSESF_CLASS;
7164 else
7165 classes[0] = X86_64_SSE_CLASS;
7166 return 1;
7167 case DFmode:
7168 classes[0] = X86_64_SSEDF_CLASS;
7169 return 1;
7170 case XFmode:
7171 classes[0] = X86_64_X87_CLASS;
7172 classes[1] = X86_64_X87UP_CLASS;
7173 return 2;
7174 case TFmode:
7175 classes[0] = X86_64_SSE_CLASS;
7176 classes[1] = X86_64_SSEUP_CLASS;
7177 return 2;
7178 case SCmode:
7179 classes[0] = X86_64_SSE_CLASS;
7180 if (!(bit_offset % 64))
7181 return 1;
7182 else
7184 static bool warned;
7186 if (!warned && warn_psabi)
7188 warned = true;
7189 inform (input_location,
7190 "the ABI of passing structure with complex float"
7191 " member has changed in GCC 4.4");
7193 classes[1] = X86_64_SSESF_CLASS;
7194 return 2;
7196 case DCmode:
7197 classes[0] = X86_64_SSEDF_CLASS;
7198 classes[1] = X86_64_SSEDF_CLASS;
7199 return 2;
7200 case XCmode:
7201 classes[0] = X86_64_COMPLEX_X87_CLASS;
7202 return 1;
7203 case TCmode:
7204 /* This modes is larger than 16 bytes. */
7205 return 0;
7206 case V8SFmode:
7207 case V8SImode:
7208 case V32QImode:
7209 case V16HImode:
7210 case V4DFmode:
7211 case V4DImode:
7212 classes[0] = X86_64_SSE_CLASS;
7213 classes[1] = X86_64_SSEUP_CLASS;
7214 classes[2] = X86_64_SSEUP_CLASS;
7215 classes[3] = X86_64_SSEUP_CLASS;
7216 return 4;
7217 case V8DFmode:
7218 case V16SFmode:
7219 case V8DImode:
7220 case V16SImode:
7221 case V32HImode:
7222 case V64QImode:
7223 classes[0] = X86_64_SSE_CLASS;
7224 classes[1] = X86_64_SSEUP_CLASS;
7225 classes[2] = X86_64_SSEUP_CLASS;
7226 classes[3] = X86_64_SSEUP_CLASS;
7227 classes[4] = X86_64_SSEUP_CLASS;
7228 classes[5] = X86_64_SSEUP_CLASS;
7229 classes[6] = X86_64_SSEUP_CLASS;
7230 classes[7] = X86_64_SSEUP_CLASS;
7231 return 8;
7232 case V4SFmode:
7233 case V4SImode:
7234 case V16QImode:
7235 case V8HImode:
7236 case V2DFmode:
7237 case V2DImode:
7238 classes[0] = X86_64_SSE_CLASS;
7239 classes[1] = X86_64_SSEUP_CLASS;
7240 return 2;
7241 case V1TImode:
7242 case V1DImode:
7243 case V2SFmode:
7244 case V2SImode:
7245 case V4HImode:
7246 case V8QImode:
7247 classes[0] = X86_64_SSE_CLASS;
7248 return 1;
7249 case BLKmode:
7250 case VOIDmode:
7251 return 0;
7252 default:
7253 gcc_assert (VECTOR_MODE_P (mode));
7255 if (bytes > 16)
7256 return 0;
7258 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7260 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7261 classes[0] = X86_64_INTEGERSI_CLASS;
7262 else
7263 classes[0] = X86_64_INTEGER_CLASS;
7264 classes[1] = X86_64_INTEGER_CLASS;
7265 return 1 + (bytes > 8);
7269 /* Examine the argument and return set number of register required in each
7270 class. Return true iff parameter should be passed in memory. */
7272 static bool
7273 examine_argument (machine_mode mode, const_tree type, int in_return,
7274 int *int_nregs, int *sse_nregs)
7276 enum x86_64_reg_class regclass[MAX_CLASSES];
7277 int n = classify_argument (mode, type, regclass, 0);
7279 *int_nregs = 0;
7280 *sse_nregs = 0;
7282 if (!n)
7283 return true;
7284 for (n--; n >= 0; n--)
7285 switch (regclass[n])
7287 case X86_64_INTEGER_CLASS:
7288 case X86_64_INTEGERSI_CLASS:
7289 (*int_nregs)++;
7290 break;
7291 case X86_64_SSE_CLASS:
7292 case X86_64_SSESF_CLASS:
7293 case X86_64_SSEDF_CLASS:
7294 (*sse_nregs)++;
7295 break;
7296 case X86_64_NO_CLASS:
7297 case X86_64_SSEUP_CLASS:
7298 break;
7299 case X86_64_X87_CLASS:
7300 case X86_64_X87UP_CLASS:
7301 case X86_64_COMPLEX_X87_CLASS:
7302 if (!in_return)
7303 return true;
7304 break;
7305 case X86_64_MEMORY_CLASS:
7306 gcc_unreachable ();
7309 return false;
7312 /* Construct container for the argument used by GCC interface. See
7313 FUNCTION_ARG for the detailed description. */
7315 static rtx
7316 construct_container (machine_mode mode, machine_mode orig_mode,
7317 const_tree type, int in_return, int nintregs, int nsseregs,
7318 const int *intreg, int sse_regno)
7320 /* The following variables hold the static issued_error state. */
7321 static bool issued_sse_arg_error;
7322 static bool issued_sse_ret_error;
7323 static bool issued_x87_ret_error;
7325 machine_mode tmpmode;
7326 int bytes =
7327 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7328 enum x86_64_reg_class regclass[MAX_CLASSES];
7329 int n;
7330 int i;
7331 int nexps = 0;
7332 int needed_sseregs, needed_intregs;
7333 rtx exp[MAX_CLASSES];
7334 rtx ret;
7336 n = classify_argument (mode, type, regclass, 0);
7337 if (!n)
7338 return NULL;
7339 if (examine_argument (mode, type, in_return, &needed_intregs,
7340 &needed_sseregs))
7341 return NULL;
7342 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7343 return NULL;
7345 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7346 some less clueful developer tries to use floating-point anyway. */
7347 if (needed_sseregs && !TARGET_SSE)
7349 if (in_return)
7351 if (!issued_sse_ret_error)
7353 error ("SSE register return with SSE disabled");
7354 issued_sse_ret_error = true;
7357 else if (!issued_sse_arg_error)
7359 error ("SSE register argument with SSE disabled");
7360 issued_sse_arg_error = true;
7362 return NULL;
7365 /* Likewise, error if the ABI requires us to return values in the
7366 x87 registers and the user specified -mno-80387. */
7367 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7368 for (i = 0; i < n; i++)
7369 if (regclass[i] == X86_64_X87_CLASS
7370 || regclass[i] == X86_64_X87UP_CLASS
7371 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7373 if (!issued_x87_ret_error)
7375 error ("x87 register return with x87 disabled");
7376 issued_x87_ret_error = true;
7378 return NULL;
7381 /* First construct simple cases. Avoid SCmode, since we want to use
7382 single register to pass this type. */
7383 if (n == 1 && mode != SCmode)
7384 switch (regclass[0])
7386 case X86_64_INTEGER_CLASS:
7387 case X86_64_INTEGERSI_CLASS:
7388 return gen_rtx_REG (mode, intreg[0]);
7389 case X86_64_SSE_CLASS:
7390 case X86_64_SSESF_CLASS:
7391 case X86_64_SSEDF_CLASS:
7392 if (mode != BLKmode)
7393 return gen_reg_or_parallel (mode, orig_mode,
7394 SSE_REGNO (sse_regno));
7395 break;
7396 case X86_64_X87_CLASS:
7397 case X86_64_COMPLEX_X87_CLASS:
7398 return gen_rtx_REG (mode, FIRST_STACK_REG);
7399 case X86_64_NO_CLASS:
7400 /* Zero sized array, struct or class. */
7401 return NULL;
7402 default:
7403 gcc_unreachable ();
7405 if (n == 2
7406 && regclass[0] == X86_64_SSE_CLASS
7407 && regclass[1] == X86_64_SSEUP_CLASS
7408 && mode != BLKmode)
7409 return gen_reg_or_parallel (mode, orig_mode,
7410 SSE_REGNO (sse_regno));
7411 if (n == 4
7412 && regclass[0] == X86_64_SSE_CLASS
7413 && regclass[1] == X86_64_SSEUP_CLASS
7414 && regclass[2] == X86_64_SSEUP_CLASS
7415 && regclass[3] == X86_64_SSEUP_CLASS
7416 && mode != BLKmode)
7417 return gen_reg_or_parallel (mode, orig_mode,
7418 SSE_REGNO (sse_regno));
7419 if (n == 8
7420 && regclass[0] == X86_64_SSE_CLASS
7421 && regclass[1] == X86_64_SSEUP_CLASS
7422 && regclass[2] == X86_64_SSEUP_CLASS
7423 && regclass[3] == X86_64_SSEUP_CLASS
7424 && regclass[4] == X86_64_SSEUP_CLASS
7425 && regclass[5] == X86_64_SSEUP_CLASS
7426 && regclass[6] == X86_64_SSEUP_CLASS
7427 && regclass[7] == X86_64_SSEUP_CLASS
7428 && mode != BLKmode)
7429 return gen_reg_or_parallel (mode, orig_mode,
7430 SSE_REGNO (sse_regno));
7431 if (n == 2
7432 && regclass[0] == X86_64_X87_CLASS
7433 && regclass[1] == X86_64_X87UP_CLASS)
7434 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7436 if (n == 2
7437 && regclass[0] == X86_64_INTEGER_CLASS
7438 && regclass[1] == X86_64_INTEGER_CLASS
7439 && (mode == CDImode || mode == TImode)
7440 && intreg[0] + 1 == intreg[1])
7441 return gen_rtx_REG (mode, intreg[0]);
7443 /* Otherwise figure out the entries of the PARALLEL. */
7444 for (i = 0; i < n; i++)
7446 int pos;
7448 switch (regclass[i])
7450 case X86_64_NO_CLASS:
7451 break;
7452 case X86_64_INTEGER_CLASS:
7453 case X86_64_INTEGERSI_CLASS:
7454 /* Merge TImodes on aligned occasions here too. */
7455 if (i * 8 + 8 > bytes)
7456 tmpmode
7457 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7458 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7459 tmpmode = SImode;
7460 else
7461 tmpmode = DImode;
7462 /* We've requested 24 bytes we
7463 don't have mode for. Use DImode. */
7464 if (tmpmode == BLKmode)
7465 tmpmode = DImode;
7466 exp [nexps++]
7467 = gen_rtx_EXPR_LIST (VOIDmode,
7468 gen_rtx_REG (tmpmode, *intreg),
7469 GEN_INT (i*8));
7470 intreg++;
7471 break;
7472 case X86_64_SSESF_CLASS:
7473 exp [nexps++]
7474 = gen_rtx_EXPR_LIST (VOIDmode,
7475 gen_rtx_REG (SFmode,
7476 SSE_REGNO (sse_regno)),
7477 GEN_INT (i*8));
7478 sse_regno++;
7479 break;
7480 case X86_64_SSEDF_CLASS:
7481 exp [nexps++]
7482 = gen_rtx_EXPR_LIST (VOIDmode,
7483 gen_rtx_REG (DFmode,
7484 SSE_REGNO (sse_regno)),
7485 GEN_INT (i*8));
7486 sse_regno++;
7487 break;
7488 case X86_64_SSE_CLASS:
7489 pos = i;
7490 switch (n)
7492 case 1:
7493 tmpmode = DImode;
7494 break;
7495 case 2:
7496 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7498 tmpmode = TImode;
7499 i++;
7501 else
7502 tmpmode = DImode;
7503 break;
7504 case 4:
7505 gcc_assert (i == 0
7506 && regclass[1] == X86_64_SSEUP_CLASS
7507 && regclass[2] == X86_64_SSEUP_CLASS
7508 && regclass[3] == X86_64_SSEUP_CLASS);
7509 tmpmode = OImode;
7510 i += 3;
7511 break;
7512 case 8:
7513 gcc_assert (i == 0
7514 && regclass[1] == X86_64_SSEUP_CLASS
7515 && regclass[2] == X86_64_SSEUP_CLASS
7516 && regclass[3] == X86_64_SSEUP_CLASS
7517 && regclass[4] == X86_64_SSEUP_CLASS
7518 && regclass[5] == X86_64_SSEUP_CLASS
7519 && regclass[6] == X86_64_SSEUP_CLASS
7520 && regclass[7] == X86_64_SSEUP_CLASS);
7521 tmpmode = XImode;
7522 i += 7;
7523 break;
7524 default:
7525 gcc_unreachable ();
7527 exp [nexps++]
7528 = gen_rtx_EXPR_LIST (VOIDmode,
7529 gen_rtx_REG (tmpmode,
7530 SSE_REGNO (sse_regno)),
7531 GEN_INT (pos*8));
7532 sse_regno++;
7533 break;
7534 default:
7535 gcc_unreachable ();
7539 /* Empty aligned struct, union or class. */
7540 if (nexps == 0)
7541 return NULL;
7543 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7544 for (i = 0; i < nexps; i++)
7545 XVECEXP (ret, 0, i) = exp [i];
7546 return ret;
7549 /* Update the data in CUM to advance over an argument of mode MODE
7550 and data type TYPE. (TYPE is null for libcalls where that information
7551 may not be available.)
7553 Return a number of integer regsiters advanced over. */
7555 static int
7556 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7557 const_tree type, HOST_WIDE_INT bytes,
7558 HOST_WIDE_INT words)
7560 int res = 0;
7561 bool error_p = NULL;
7563 if (TARGET_IAMCU)
7565 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7566 bytes in registers. */
7567 if (!VECTOR_MODE_P (mode) && bytes <= 8)
7568 goto pass_in_reg;
7569 return res;
7572 switch (mode)
7574 default:
7575 break;
7577 case BLKmode:
7578 if (bytes < 0)
7579 break;
7580 /* FALLTHRU */
7582 case DImode:
7583 case SImode:
7584 case HImode:
7585 case QImode:
7586 pass_in_reg:
7587 cum->words += words;
7588 cum->nregs -= words;
7589 cum->regno += words;
7590 if (cum->nregs >= 0)
7591 res = words;
7592 if (cum->nregs <= 0)
7594 cum->nregs = 0;
7595 cfun->machine->arg_reg_available = false;
7596 cum->regno = 0;
7598 break;
7600 case OImode:
7601 /* OImode shouldn't be used directly. */
7602 gcc_unreachable ();
7604 case DFmode:
7605 if (cum->float_in_sse == -1)
7606 error_p = 1;
7607 if (cum->float_in_sse < 2)
7608 break;
7609 case SFmode:
7610 if (cum->float_in_sse == -1)
7611 error_p = 1;
7612 if (cum->float_in_sse < 1)
7613 break;
7614 /* FALLTHRU */
7616 case V8SFmode:
7617 case V8SImode:
7618 case V64QImode:
7619 case V32HImode:
7620 case V16SImode:
7621 case V8DImode:
7622 case V16SFmode:
7623 case V8DFmode:
7624 case V32QImode:
7625 case V16HImode:
7626 case V4DFmode:
7627 case V4DImode:
7628 case TImode:
7629 case V16QImode:
7630 case V8HImode:
7631 case V4SImode:
7632 case V2DImode:
7633 case V4SFmode:
7634 case V2DFmode:
7635 if (!type || !AGGREGATE_TYPE_P (type))
7637 cum->sse_words += words;
7638 cum->sse_nregs -= 1;
7639 cum->sse_regno += 1;
7640 if (cum->sse_nregs <= 0)
7642 cum->sse_nregs = 0;
7643 cum->sse_regno = 0;
7646 break;
7648 case V8QImode:
7649 case V4HImode:
7650 case V2SImode:
7651 case V2SFmode:
7652 case V1TImode:
7653 case V1DImode:
7654 if (!type || !AGGREGATE_TYPE_P (type))
7656 cum->mmx_words += words;
7657 cum->mmx_nregs -= 1;
7658 cum->mmx_regno += 1;
7659 if (cum->mmx_nregs <= 0)
7661 cum->mmx_nregs = 0;
7662 cum->mmx_regno = 0;
7665 break;
7667 if (error_p)
7669 cum->float_in_sse = 0;
7670 error ("calling %qD with SSE calling convention without "
7671 "SSE/SSE2 enabled", cum->decl);
7672 sorry ("this is a GCC bug that can be worked around by adding "
7673 "attribute used to function called");
7676 return res;
7679 static int
7680 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7681 const_tree type, HOST_WIDE_INT words, bool named)
7683 int int_nregs, sse_nregs;
7685 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7686 if (!named && (VALID_AVX512F_REG_MODE (mode)
7687 || VALID_AVX256_REG_MODE (mode)))
7688 return 0;
7690 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7691 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7693 cum->nregs -= int_nregs;
7694 cum->sse_nregs -= sse_nregs;
7695 cum->regno += int_nregs;
7696 cum->sse_regno += sse_nregs;
7697 return int_nregs;
7699 else
7701 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7702 cum->words = (cum->words + align - 1) & ~(align - 1);
7703 cum->words += words;
7704 return 0;
7708 static int
7709 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7710 HOST_WIDE_INT words)
7712 /* Otherwise, this should be passed indirect. */
7713 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7715 cum->words += words;
7716 if (cum->nregs > 0)
7718 cum->nregs -= 1;
7719 cum->regno += 1;
7720 return 1;
7722 return 0;
7725 /* Update the data in CUM to advance over an argument of mode MODE and
7726 data type TYPE. (TYPE is null for libcalls where that information
7727 may not be available.) */
7729 static void
7730 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7731 const_tree type, bool named)
7733 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7734 HOST_WIDE_INT bytes, words;
7735 int nregs;
7737 if (mode == BLKmode)
7738 bytes = int_size_in_bytes (type);
7739 else
7740 bytes = GET_MODE_SIZE (mode);
7741 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7743 if (type)
7744 mode = type_natural_mode (type, NULL, false);
7746 if ((type && POINTER_BOUNDS_TYPE_P (type))
7747 || POINTER_BOUNDS_MODE_P (mode))
7749 /* If we pass bounds in BT then just update remained bounds count. */
7750 if (cum->bnds_in_bt)
7752 cum->bnds_in_bt--;
7753 return;
7756 /* Update remained number of bounds to force. */
7757 if (cum->force_bnd_pass)
7758 cum->force_bnd_pass--;
7760 cum->bnd_regno++;
7762 return;
7765 /* The first arg not going to Bounds Tables resets this counter. */
7766 cum->bnds_in_bt = 0;
7767 /* For unnamed args we always pass bounds to avoid bounds mess when
7768 passed and received types do not match. If bounds do not follow
7769 unnamed arg, still pretend required number of bounds were passed. */
7770 if (cum->force_bnd_pass)
7772 cum->bnd_regno += cum->force_bnd_pass;
7773 cum->force_bnd_pass = 0;
7776 if (TARGET_64BIT)
7778 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7780 if (call_abi == MS_ABI)
7781 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7782 else
7783 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7785 else
7786 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7788 /* For stdarg we expect bounds to be passed for each value passed
7789 in register. */
7790 if (cum->stdarg)
7791 cum->force_bnd_pass = nregs;
7792 /* For pointers passed in memory we expect bounds passed in Bounds
7793 Table. */
7794 if (!nregs)
7795 cum->bnds_in_bt = chkp_type_bounds_count (type);
7798 /* Define where to put the arguments to a function.
7799 Value is zero to push the argument on the stack,
7800 or a hard register in which to store the argument.
7802 MODE is the argument's machine mode.
7803 TYPE is the data type of the argument (as a tree).
7804 This is null for libcalls where that information may
7805 not be available.
7806 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7807 the preceding args and about the function being called.
7808 NAMED is nonzero if this argument is a named parameter
7809 (otherwise it is an extra parameter matching an ellipsis). */
7811 static rtx
7812 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7813 machine_mode orig_mode, const_tree type,
7814 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7816 bool error_p = false;
7817 /* Avoid the AL settings for the Unix64 ABI. */
7818 if (mode == VOIDmode)
7819 return constm1_rtx;
7821 if (TARGET_IAMCU)
7823 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7824 bytes in registers. */
7825 if (!VECTOR_MODE_P (mode) && bytes <= 8)
7826 goto pass_in_reg;
7827 return NULL_RTX;
7830 switch (mode)
7832 default:
7833 break;
7835 case BLKmode:
7836 if (bytes < 0)
7837 break;
7838 /* FALLTHRU */
7839 case DImode:
7840 case SImode:
7841 case HImode:
7842 case QImode:
7843 pass_in_reg:
7844 if (words <= cum->nregs)
7846 int regno = cum->regno;
7848 /* Fastcall allocates the first two DWORD (SImode) or
7849 smaller arguments to ECX and EDX if it isn't an
7850 aggregate type . */
7851 if (cum->fastcall)
7853 if (mode == BLKmode
7854 || mode == DImode
7855 || (type && AGGREGATE_TYPE_P (type)))
7856 break;
7858 /* ECX not EAX is the first allocated register. */
7859 if (regno == AX_REG)
7860 regno = CX_REG;
7862 return gen_rtx_REG (mode, regno);
7864 break;
7866 case DFmode:
7867 if (cum->float_in_sse == -1)
7868 error_p = 1;
7869 if (cum->float_in_sse < 2)
7870 break;
7871 case SFmode:
7872 if (cum->float_in_sse == -1)
7873 error_p = 1;
7874 if (cum->float_in_sse < 1)
7875 break;
7876 /* FALLTHRU */
7877 case TImode:
7878 /* In 32bit, we pass TImode in xmm registers. */
7879 case V16QImode:
7880 case V8HImode:
7881 case V4SImode:
7882 case V2DImode:
7883 case V4SFmode:
7884 case V2DFmode:
7885 if (!type || !AGGREGATE_TYPE_P (type))
7887 if (cum->sse_nregs)
7888 return gen_reg_or_parallel (mode, orig_mode,
7889 cum->sse_regno + FIRST_SSE_REG);
7891 break;
7893 case OImode:
7894 case XImode:
7895 /* OImode and XImode shouldn't be used directly. */
7896 gcc_unreachable ();
7898 case V64QImode:
7899 case V32HImode:
7900 case V16SImode:
7901 case V8DImode:
7902 case V16SFmode:
7903 case V8DFmode:
7904 case V8SFmode:
7905 case V8SImode:
7906 case V32QImode:
7907 case V16HImode:
7908 case V4DFmode:
7909 case V4DImode:
7910 if (!type || !AGGREGATE_TYPE_P (type))
7912 if (cum->sse_nregs)
7913 return gen_reg_or_parallel (mode, orig_mode,
7914 cum->sse_regno + FIRST_SSE_REG);
7916 break;
7918 case V8QImode:
7919 case V4HImode:
7920 case V2SImode:
7921 case V2SFmode:
7922 case V1TImode:
7923 case V1DImode:
7924 if (!type || !AGGREGATE_TYPE_P (type))
7926 if (cum->mmx_nregs)
7927 return gen_reg_or_parallel (mode, orig_mode,
7928 cum->mmx_regno + FIRST_MMX_REG);
7930 break;
7932 if (error_p)
7934 cum->float_in_sse = 0;
7935 error ("calling %qD with SSE calling convention without "
7936 "SSE/SSE2 enabled", cum->decl);
7937 sorry ("this is a GCC bug that can be worked around by adding "
7938 "attribute used to function called");
7941 return NULL_RTX;
7944 static rtx
7945 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7946 machine_mode orig_mode, const_tree type, bool named)
7948 /* Handle a hidden AL argument containing number of registers
7949 for varargs x86-64 functions. */
7950 if (mode == VOIDmode)
7951 return GEN_INT (cum->maybe_vaarg
7952 ? (cum->sse_nregs < 0
7953 ? X86_64_SSE_REGPARM_MAX
7954 : cum->sse_regno)
7955 : -1);
7957 switch (mode)
7959 default:
7960 break;
7962 case V8SFmode:
7963 case V8SImode:
7964 case V32QImode:
7965 case V16HImode:
7966 case V4DFmode:
7967 case V4DImode:
7968 case V16SFmode:
7969 case V16SImode:
7970 case V64QImode:
7971 case V32HImode:
7972 case V8DFmode:
7973 case V8DImode:
7974 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7975 if (!named)
7976 return NULL;
7977 break;
7980 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7981 cum->sse_nregs,
7982 &x86_64_int_parameter_registers [cum->regno],
7983 cum->sse_regno);
7986 static rtx
7987 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7988 machine_mode orig_mode, bool named,
7989 HOST_WIDE_INT bytes)
7991 unsigned int regno;
7993 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7994 We use value of -2 to specify that current function call is MSABI. */
7995 if (mode == VOIDmode)
7996 return GEN_INT (-2);
7998 /* If we've run out of registers, it goes on the stack. */
7999 if (cum->nregs == 0)
8000 return NULL_RTX;
8002 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
8004 /* Only floating point modes are passed in anything but integer regs. */
8005 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
8007 if (named)
8008 regno = cum->regno + FIRST_SSE_REG;
8009 else
8011 rtx t1, t2;
8013 /* Unnamed floating parameters are passed in both the
8014 SSE and integer registers. */
8015 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8016 t2 = gen_rtx_REG (mode, regno);
8017 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8018 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8019 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8022 /* Handle aggregated types passed in register. */
8023 if (orig_mode == BLKmode)
8025 if (bytes > 0 && bytes <= 8)
8026 mode = (bytes > 4 ? DImode : SImode);
8027 if (mode == BLKmode)
8028 mode = DImode;
8031 return gen_reg_or_parallel (mode, orig_mode, regno);
8034 /* Return where to put the arguments to a function.
8035 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8037 MODE is the argument's machine mode. TYPE is the data type of the
8038 argument. It is null for libcalls where that information may not be
8039 available. CUM gives information about the preceding args and about
8040 the function being called. NAMED is nonzero if this argument is a
8041 named parameter (otherwise it is an extra parameter matching an
8042 ellipsis). */
8044 static rtx
8045 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8046 const_tree type, bool named)
8048 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8049 machine_mode mode = omode;
8050 HOST_WIDE_INT bytes, words;
8051 rtx arg;
8053 /* All pointer bounds argumntas are handled separately here. */
8054 if ((type && POINTER_BOUNDS_TYPE_P (type))
8055 || POINTER_BOUNDS_MODE_P (mode))
8057 /* Return NULL if bounds are forced to go in Bounds Table. */
8058 if (cum->bnds_in_bt)
8059 arg = NULL;
8060 /* Return the next available bound reg if any. */
8061 else if (cum->bnd_regno <= LAST_BND_REG)
8062 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8063 /* Return the next special slot number otherwise. */
8064 else
8065 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8067 return arg;
8070 if (mode == BLKmode)
8071 bytes = int_size_in_bytes (type);
8072 else
8073 bytes = GET_MODE_SIZE (mode);
8074 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8076 /* To simplify the code below, represent vector types with a vector mode
8077 even if MMX/SSE are not active. */
8078 if (type && TREE_CODE (type) == VECTOR_TYPE)
8079 mode = type_natural_mode (type, cum, false);
8081 if (TARGET_64BIT)
8083 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8085 if (call_abi == MS_ABI)
8086 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8087 else
8088 arg = function_arg_64 (cum, mode, omode, type, named);
8090 else
8091 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8093 return arg;
8096 /* A C expression that indicates when an argument must be passed by
8097 reference. If nonzero for an argument, a copy of that argument is
8098 made in memory and a pointer to the argument is passed instead of
8099 the argument itself. The pointer is passed in whatever way is
8100 appropriate for passing a pointer to that type. */
8102 static bool
8103 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8104 const_tree type, bool)
8106 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8108 /* Bounds are never passed by reference. */
8109 if ((type && POINTER_BOUNDS_TYPE_P (type))
8110 || POINTER_BOUNDS_MODE_P (mode))
8111 return false;
8113 if (TARGET_64BIT)
8115 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8117 /* See Windows x64 Software Convention. */
8118 if (call_abi == MS_ABI)
8120 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8122 if (type)
8124 /* Arrays are passed by reference. */
8125 if (TREE_CODE (type) == ARRAY_TYPE)
8126 return true;
8128 if (RECORD_OR_UNION_TYPE_P (type))
8130 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8131 are passed by reference. */
8132 msize = int_size_in_bytes (type);
8136 /* __m128 is passed by reference. */
8137 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8139 else if (type && int_size_in_bytes (type) == -1)
8140 return true;
8143 return false;
8146 /* Return true when TYPE should be 128bit aligned for 32bit argument
8147 passing ABI. XXX: This function is obsolete and is only used for
8148 checking psABI compatibility with previous versions of GCC. */
8150 static bool
8151 ix86_compat_aligned_value_p (const_tree type)
8153 machine_mode mode = TYPE_MODE (type);
8154 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8155 || mode == TDmode
8156 || mode == TFmode
8157 || mode == TCmode)
8158 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8159 return true;
8160 if (TYPE_ALIGN (type) < 128)
8161 return false;
8163 if (AGGREGATE_TYPE_P (type))
8165 /* Walk the aggregates recursively. */
8166 switch (TREE_CODE (type))
8168 case RECORD_TYPE:
8169 case UNION_TYPE:
8170 case QUAL_UNION_TYPE:
8172 tree field;
8174 /* Walk all the structure fields. */
8175 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8177 if (TREE_CODE (field) == FIELD_DECL
8178 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8179 return true;
8181 break;
8184 case ARRAY_TYPE:
8185 /* Just for use if some languages passes arrays by value. */
8186 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8187 return true;
8188 break;
8190 default:
8191 gcc_unreachable ();
8194 return false;
8197 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8198 XXX: This function is obsolete and is only used for checking psABI
8199 compatibility with previous versions of GCC. */
8201 static unsigned int
8202 ix86_compat_function_arg_boundary (machine_mode mode,
8203 const_tree type, unsigned int align)
8205 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8206 natural boundaries. */
8207 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8209 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8210 make an exception for SSE modes since these require 128bit
8211 alignment.
8213 The handling here differs from field_alignment. ICC aligns MMX
8214 arguments to 4 byte boundaries, while structure fields are aligned
8215 to 8 byte boundaries. */
8216 if (!type)
8218 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8219 align = PARM_BOUNDARY;
8221 else
8223 if (!ix86_compat_aligned_value_p (type))
8224 align = PARM_BOUNDARY;
8227 if (align > BIGGEST_ALIGNMENT)
8228 align = BIGGEST_ALIGNMENT;
8229 return align;
8232 /* Return true when TYPE should be 128bit aligned for 32bit argument
8233 passing ABI. */
8235 static bool
8236 ix86_contains_aligned_value_p (const_tree type)
8238 machine_mode mode = TYPE_MODE (type);
8240 if (mode == XFmode || mode == XCmode)
8241 return false;
8243 if (TYPE_ALIGN (type) < 128)
8244 return false;
8246 if (AGGREGATE_TYPE_P (type))
8248 /* Walk the aggregates recursively. */
8249 switch (TREE_CODE (type))
8251 case RECORD_TYPE:
8252 case UNION_TYPE:
8253 case QUAL_UNION_TYPE:
8255 tree field;
8257 /* Walk all the structure fields. */
8258 for (field = TYPE_FIELDS (type);
8259 field;
8260 field = DECL_CHAIN (field))
8262 if (TREE_CODE (field) == FIELD_DECL
8263 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8264 return true;
8266 break;
8269 case ARRAY_TYPE:
8270 /* Just for use if some languages passes arrays by value. */
8271 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8272 return true;
8273 break;
8275 default:
8276 gcc_unreachable ();
8279 else
8280 return TYPE_ALIGN (type) >= 128;
8282 return false;
8285 /* Gives the alignment boundary, in bits, of an argument with the
8286 specified mode and type. */
8288 static unsigned int
8289 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8291 unsigned int align;
8292 if (type)
8294 /* Since the main variant type is used for call, we convert it to
8295 the main variant type. */
8296 type = TYPE_MAIN_VARIANT (type);
8297 align = TYPE_ALIGN (type);
8299 else
8300 align = GET_MODE_ALIGNMENT (mode);
8301 if (align < PARM_BOUNDARY)
8302 align = PARM_BOUNDARY;
8303 else
8305 static bool warned;
8306 unsigned int saved_align = align;
8308 if (!TARGET_64BIT)
8310 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8311 if (!type)
8313 if (mode == XFmode || mode == XCmode)
8314 align = PARM_BOUNDARY;
8316 else if (!ix86_contains_aligned_value_p (type))
8317 align = PARM_BOUNDARY;
8319 if (align < 128)
8320 align = PARM_BOUNDARY;
8323 if (warn_psabi
8324 && !warned
8325 && align != ix86_compat_function_arg_boundary (mode, type,
8326 saved_align))
8328 warned = true;
8329 inform (input_location,
8330 "The ABI for passing parameters with %d-byte"
8331 " alignment has changed in GCC 4.6",
8332 align / BITS_PER_UNIT);
8336 return align;
8339 /* Return true if N is a possible register number of function value. */
8341 static bool
8342 ix86_function_value_regno_p (const unsigned int regno)
8344 switch (regno)
8346 case AX_REG:
8347 return true;
8348 case DX_REG:
8349 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8350 case DI_REG:
8351 case SI_REG:
8352 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8354 case BND0_REG:
8355 case BND1_REG:
8356 return chkp_function_instrumented_p (current_function_decl);
8358 /* Complex values are returned in %st(0)/%st(1) pair. */
8359 case ST0_REG:
8360 case ST1_REG:
8361 /* TODO: The function should depend on current function ABI but
8362 builtins.c would need updating then. Therefore we use the
8363 default ABI. */
8364 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8365 return false;
8366 return TARGET_FLOAT_RETURNS_IN_80387;
8368 /* Complex values are returned in %xmm0/%xmm1 pair. */
8369 case XMM0_REG:
8370 case XMM1_REG:
8371 return TARGET_SSE;
8373 case MM0_REG:
8374 if (TARGET_MACHO || TARGET_64BIT)
8375 return false;
8376 return TARGET_MMX;
8379 return false;
8382 /* Define how to find the value returned by a function.
8383 VALTYPE is the data type of the value (as a tree).
8384 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8385 otherwise, FUNC is 0. */
8387 static rtx
8388 function_value_32 (machine_mode orig_mode, machine_mode mode,
8389 const_tree fntype, const_tree fn)
8391 unsigned int regno;
8393 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8394 we normally prevent this case when mmx is not available. However
8395 some ABIs may require the result to be returned like DImode. */
8396 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8397 regno = FIRST_MMX_REG;
8399 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8400 we prevent this case when sse is not available. However some ABIs
8401 may require the result to be returned like integer TImode. */
8402 else if (mode == TImode
8403 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8404 regno = FIRST_SSE_REG;
8406 /* 32-byte vector modes in %ymm0. */
8407 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8408 regno = FIRST_SSE_REG;
8410 /* 64-byte vector modes in %zmm0. */
8411 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8412 regno = FIRST_SSE_REG;
8414 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8415 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8416 regno = FIRST_FLOAT_REG;
8417 else
8418 /* Most things go in %eax. */
8419 regno = AX_REG;
8421 /* Override FP return register with %xmm0 for local functions when
8422 SSE math is enabled or for functions with sseregparm attribute. */
8423 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8425 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8426 if (sse_level == -1)
8428 error ("calling %qD with SSE caling convention without "
8429 "SSE/SSE2 enabled", fn);
8430 sorry ("this is a GCC bug that can be worked around by adding "
8431 "attribute used to function called");
8433 else if ((sse_level >= 1 && mode == SFmode)
8434 || (sse_level == 2 && mode == DFmode))
8435 regno = FIRST_SSE_REG;
8438 /* OImode shouldn't be used directly. */
8439 gcc_assert (mode != OImode);
8441 return gen_rtx_REG (orig_mode, regno);
8444 static rtx
8445 function_value_64 (machine_mode orig_mode, machine_mode mode,
8446 const_tree valtype)
8448 rtx ret;
8450 /* Handle libcalls, which don't provide a type node. */
8451 if (valtype == NULL)
8453 unsigned int regno;
8455 switch (mode)
8457 case SFmode:
8458 case SCmode:
8459 case DFmode:
8460 case DCmode:
8461 case TFmode:
8462 case SDmode:
8463 case DDmode:
8464 case TDmode:
8465 regno = FIRST_SSE_REG;
8466 break;
8467 case XFmode:
8468 case XCmode:
8469 regno = FIRST_FLOAT_REG;
8470 break;
8471 case TCmode:
8472 return NULL;
8473 default:
8474 regno = AX_REG;
8477 return gen_rtx_REG (mode, regno);
8479 else if (POINTER_TYPE_P (valtype)
8480 && !upc_shared_type_p (TREE_TYPE (valtype)))
8482 /* Pointers are always returned in word_mode. */
8483 mode = word_mode;
8486 ret = construct_container (mode, orig_mode, valtype, 1,
8487 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8488 x86_64_int_return_registers, 0);
8490 /* For zero sized structures, construct_container returns NULL, but we
8491 need to keep rest of compiler happy by returning meaningful value. */
8492 if (!ret)
8493 ret = gen_rtx_REG (orig_mode, AX_REG);
8495 return ret;
8498 static rtx
8499 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8500 const_tree valtype)
8502 unsigned int regno = AX_REG;
8504 if (TARGET_SSE)
8506 switch (GET_MODE_SIZE (mode))
8508 case 16:
8509 if (valtype != NULL_TREE
8510 && !VECTOR_INTEGER_TYPE_P (valtype)
8511 && !VECTOR_INTEGER_TYPE_P (valtype)
8512 && !INTEGRAL_TYPE_P (valtype)
8513 && !VECTOR_FLOAT_TYPE_P (valtype))
8514 break;
8515 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8516 && !COMPLEX_MODE_P (mode))
8517 regno = FIRST_SSE_REG;
8518 break;
8519 case 8:
8520 case 4:
8521 if (mode == SFmode || mode == DFmode)
8522 regno = FIRST_SSE_REG;
8523 break;
8524 default:
8525 break;
8528 return gen_rtx_REG (orig_mode, regno);
8531 static rtx
8532 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8533 machine_mode orig_mode, machine_mode mode)
8535 const_tree fn, fntype;
8537 fn = NULL_TREE;
8538 if (fntype_or_decl && DECL_P (fntype_or_decl))
8539 fn = fntype_or_decl;
8540 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8542 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8543 || POINTER_BOUNDS_MODE_P (mode))
8544 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8545 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8546 return function_value_ms_64 (orig_mode, mode, valtype);
8547 else if (TARGET_64BIT)
8548 return function_value_64 (orig_mode, mode, valtype);
8549 else
8550 return function_value_32 (orig_mode, mode, fntype, fn);
8553 static rtx
8554 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8556 machine_mode mode, orig_mode;
8558 orig_mode = TYPE_MODE (valtype);
8559 mode = type_natural_mode (valtype, NULL, true);
8560 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8563 /* Return an RTX representing a place where a function returns
8564 or recieves pointer bounds or NULL if no bounds are returned.
8566 VALTYPE is a data type of a value returned by the function.
8568 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8569 or FUNCTION_TYPE of the function.
8571 If OUTGOING is false, return a place in which the caller will
8572 see the return value. Otherwise, return a place where a
8573 function returns a value. */
8575 static rtx
8576 ix86_function_value_bounds (const_tree valtype,
8577 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8578 bool outgoing ATTRIBUTE_UNUSED)
8580 rtx res = NULL_RTX;
8582 if (BOUNDED_TYPE_P (valtype))
8583 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8584 else if (chkp_type_has_pointer (valtype))
8586 bitmap slots;
8587 rtx bounds[2];
8588 bitmap_iterator bi;
8589 unsigned i, bnd_no = 0;
8591 bitmap_obstack_initialize (NULL);
8592 slots = BITMAP_ALLOC (NULL);
8593 chkp_find_bound_slots (valtype, slots);
8595 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8597 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8598 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8599 gcc_assert (bnd_no < 2);
8600 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8603 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8605 BITMAP_FREE (slots);
8606 bitmap_obstack_release (NULL);
8608 else
8609 res = NULL_RTX;
8611 return res;
8614 /* Pointer function arguments and return values are promoted to
8615 word_mode. */
8617 static machine_mode
8618 ix86_promote_function_mode (const_tree type, machine_mode mode,
8619 int *punsignedp, const_tree fntype,
8620 int for_return)
8622 if (type != NULL_TREE && POINTER_TYPE_P (type))
8624 if (upc_shared_type_p (TREE_TYPE (type)))
8626 *punsignedp = 1;
8627 return TYPE_MODE (upc_pts_rep_type_node);
8629 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8630 return word_mode;
8632 return default_promote_function_mode (type, mode, punsignedp, fntype,
8633 for_return);
8636 /* Return true if a structure, union or array with MODE containing FIELD
8637 should be accessed using BLKmode. */
8639 static bool
8640 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8642 /* Union with XFmode must be in BLKmode. */
8643 return (mode == XFmode
8644 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8645 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8649 ix86_libcall_value (machine_mode mode)
8651 return ix86_function_value_1 (NULL, NULL, mode, mode);
8654 /* Return true iff type is returned in memory. */
8656 static bool
8657 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8659 #ifdef SUBTARGET_RETURN_IN_MEMORY
8660 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8661 #else
8662 const machine_mode mode = type_natural_mode (type, NULL, true);
8663 HOST_WIDE_INT size;
8665 if (POINTER_BOUNDS_TYPE_P (type))
8666 return false;
8668 if (TARGET_64BIT)
8670 if (ix86_function_type_abi (fntype) == MS_ABI)
8672 size = int_size_in_bytes (type);
8674 /* __m128 is returned in xmm0. */
8675 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8676 || INTEGRAL_TYPE_P (type)
8677 || VECTOR_FLOAT_TYPE_P (type))
8678 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8679 && !COMPLEX_MODE_P (mode)
8680 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8681 return false;
8683 /* Otherwise, the size must be exactly in [1248]. */
8684 return size != 1 && size != 2 && size != 4 && size != 8;
8686 else
8688 int needed_intregs, needed_sseregs;
8690 return examine_argument (mode, type, 1,
8691 &needed_intregs, &needed_sseregs);
8694 else
8696 size = int_size_in_bytes (type);
8698 /* Intel MCU psABI returns scalars and aggregates no larger than 8
8699 bytes in registers. */
8700 if (TARGET_IAMCU)
8701 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
8703 if (mode == BLKmode)
8704 return true;
8706 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8707 return false;
8709 if (VECTOR_MODE_P (mode) || mode == TImode)
8711 /* User-created vectors small enough to fit in EAX. */
8712 if (size < 8)
8713 return false;
8715 /* Unless ABI prescibes otherwise,
8716 MMX/3dNow values are returned in MM0 if available. */
8718 if (size == 8)
8719 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8721 /* SSE values are returned in XMM0 if available. */
8722 if (size == 16)
8723 return !TARGET_SSE;
8725 /* AVX values are returned in YMM0 if available. */
8726 if (size == 32)
8727 return !TARGET_AVX;
8729 /* AVX512F values are returned in ZMM0 if available. */
8730 if (size == 64)
8731 return !TARGET_AVX512F;
8734 if (mode == XFmode)
8735 return false;
8737 if (size > 12)
8738 return true;
8740 /* OImode shouldn't be used directly. */
8741 gcc_assert (mode != OImode);
8743 return false;
8745 #endif
8749 /* Create the va_list data type. */
8751 static tree
8752 ix86_build_builtin_va_list_64 (void)
8754 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8756 record = lang_hooks.types.make_type (RECORD_TYPE);
8757 type_decl = build_decl (BUILTINS_LOCATION,
8758 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8760 f_gpr = build_decl (BUILTINS_LOCATION,
8761 FIELD_DECL, get_identifier ("gp_offset"),
8762 unsigned_type_node);
8763 f_fpr = build_decl (BUILTINS_LOCATION,
8764 FIELD_DECL, get_identifier ("fp_offset"),
8765 unsigned_type_node);
8766 f_ovf = build_decl (BUILTINS_LOCATION,
8767 FIELD_DECL, get_identifier ("overflow_arg_area"),
8768 ptr_type_node);
8769 f_sav = build_decl (BUILTINS_LOCATION,
8770 FIELD_DECL, get_identifier ("reg_save_area"),
8771 ptr_type_node);
8773 va_list_gpr_counter_field = f_gpr;
8774 va_list_fpr_counter_field = f_fpr;
8776 DECL_FIELD_CONTEXT (f_gpr) = record;
8777 DECL_FIELD_CONTEXT (f_fpr) = record;
8778 DECL_FIELD_CONTEXT (f_ovf) = record;
8779 DECL_FIELD_CONTEXT (f_sav) = record;
8781 TYPE_STUB_DECL (record) = type_decl;
8782 TYPE_NAME (record) = type_decl;
8783 TYPE_FIELDS (record) = f_gpr;
8784 DECL_CHAIN (f_gpr) = f_fpr;
8785 DECL_CHAIN (f_fpr) = f_ovf;
8786 DECL_CHAIN (f_ovf) = f_sav;
8788 layout_type (record);
8790 /* The correct type is an array type of one element. */
8791 return build_array_type (record, build_index_type (size_zero_node));
8794 /* Setup the builtin va_list data type and for 64-bit the additional
8795 calling convention specific va_list data types. */
8797 static tree
8798 ix86_build_builtin_va_list (void)
8800 if (TARGET_64BIT)
8802 /* Initialize ABI specific va_list builtin types. */
8803 tree sysv_va_list, ms_va_list;
8805 sysv_va_list = ix86_build_builtin_va_list_64 ();
8806 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
8808 /* For MS_ABI we use plain pointer to argument area. */
8809 ms_va_list = build_pointer_type (char_type_node);
8810 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
8812 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
8814 else
8816 /* For i386 we use plain pointer to argument area. */
8817 return build_pointer_type (char_type_node);
8821 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8823 static void
8824 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8826 rtx save_area, mem;
8827 alias_set_type set;
8828 int i, max;
8830 /* GPR size of varargs save area. */
8831 if (cfun->va_list_gpr_size)
8832 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8833 else
8834 ix86_varargs_gpr_size = 0;
8836 /* FPR size of varargs save area. We don't need it if we don't pass
8837 anything in SSE registers. */
8838 if (TARGET_SSE && cfun->va_list_fpr_size)
8839 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8840 else
8841 ix86_varargs_fpr_size = 0;
8843 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8844 return;
8846 save_area = frame_pointer_rtx;
8847 set = get_varargs_alias_set ();
8849 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8850 if (max > X86_64_REGPARM_MAX)
8851 max = X86_64_REGPARM_MAX;
8853 for (i = cum->regno; i < max; i++)
8855 mem = gen_rtx_MEM (word_mode,
8856 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8857 MEM_NOTRAP_P (mem) = 1;
8858 set_mem_alias_set (mem, set);
8859 emit_move_insn (mem,
8860 gen_rtx_REG (word_mode,
8861 x86_64_int_parameter_registers[i]));
8864 if (ix86_varargs_fpr_size)
8866 machine_mode smode;
8867 rtx_code_label *label;
8868 rtx test;
8870 /* Now emit code to save SSE registers. The AX parameter contains number
8871 of SSE parameter registers used to call this function, though all we
8872 actually check here is the zero/non-zero status. */
8874 label = gen_label_rtx ();
8875 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8876 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8877 label));
8879 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8880 we used movdqa (i.e. TImode) instead? Perhaps even better would
8881 be if we could determine the real mode of the data, via a hook
8882 into pass_stdarg. Ignore all that for now. */
8883 smode = V4SFmode;
8884 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8885 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8887 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8888 if (max > X86_64_SSE_REGPARM_MAX)
8889 max = X86_64_SSE_REGPARM_MAX;
8891 for (i = cum->sse_regno; i < max; ++i)
8893 mem = plus_constant (Pmode, save_area,
8894 i * 16 + ix86_varargs_gpr_size);
8895 mem = gen_rtx_MEM (smode, mem);
8896 MEM_NOTRAP_P (mem) = 1;
8897 set_mem_alias_set (mem, set);
8898 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8900 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8903 emit_label (label);
8907 static void
8908 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8910 alias_set_type set = get_varargs_alias_set ();
8911 int i;
8913 /* Reset to zero, as there might be a sysv vaarg used
8914 before. */
8915 ix86_varargs_gpr_size = 0;
8916 ix86_varargs_fpr_size = 0;
8918 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8920 rtx reg, mem;
8922 mem = gen_rtx_MEM (Pmode,
8923 plus_constant (Pmode, virtual_incoming_args_rtx,
8924 i * UNITS_PER_WORD));
8925 MEM_NOTRAP_P (mem) = 1;
8926 set_mem_alias_set (mem, set);
8928 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8929 emit_move_insn (mem, reg);
8933 static void
8934 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8935 tree type, int *, int no_rtl)
8937 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8938 CUMULATIVE_ARGS next_cum;
8939 tree fntype;
8941 /* This argument doesn't appear to be used anymore. Which is good,
8942 because the old code here didn't suppress rtl generation. */
8943 gcc_assert (!no_rtl);
8945 if (!TARGET_64BIT)
8946 return;
8948 fntype = TREE_TYPE (current_function_decl);
8950 /* For varargs, we do not want to skip the dummy va_dcl argument.
8951 For stdargs, we do want to skip the last named argument. */
8952 next_cum = *cum;
8953 if (stdarg_p (fntype))
8954 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8955 true);
8957 if (cum->call_abi == MS_ABI)
8958 setup_incoming_varargs_ms_64 (&next_cum);
8959 else
8960 setup_incoming_varargs_64 (&next_cum);
8963 static void
8964 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8965 enum machine_mode mode,
8966 tree type,
8967 int *pretend_size ATTRIBUTE_UNUSED,
8968 int no_rtl)
8970 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8971 CUMULATIVE_ARGS next_cum;
8972 tree fntype;
8973 rtx save_area;
8974 int bnd_reg, i, max;
8976 gcc_assert (!no_rtl);
8978 /* Do nothing if we use plain pointer to argument area. */
8979 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8980 return;
8982 fntype = TREE_TYPE (current_function_decl);
8984 /* For varargs, we do not want to skip the dummy va_dcl argument.
8985 For stdargs, we do want to skip the last named argument. */
8986 next_cum = *cum;
8987 if (stdarg_p (fntype))
8988 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8989 true);
8990 save_area = frame_pointer_rtx;
8992 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8993 if (max > X86_64_REGPARM_MAX)
8994 max = X86_64_REGPARM_MAX;
8996 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8997 if (chkp_function_instrumented_p (current_function_decl))
8998 for (i = cum->regno; i < max; i++)
9000 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
9001 rtx ptr = gen_rtx_REG (Pmode,
9002 x86_64_int_parameter_registers[i]);
9003 rtx bounds;
9005 if (bnd_reg <= LAST_BND_REG)
9006 bounds = gen_rtx_REG (BNDmode, bnd_reg);
9007 else
9009 rtx ldx_addr =
9010 plus_constant (Pmode, arg_pointer_rtx,
9011 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
9012 bounds = gen_reg_rtx (BNDmode);
9013 emit_insn (BNDmode == BND64mode
9014 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
9015 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9018 emit_insn (BNDmode == BND64mode
9019 ? gen_bnd64_stx (addr, ptr, bounds)
9020 : gen_bnd32_stx (addr, ptr, bounds));
9022 bnd_reg++;
9027 /* Checks if TYPE is of kind va_list char *. */
9029 static bool
9030 is_va_list_char_pointer (tree type)
9032 tree canonic;
9034 /* For 32-bit it is always true. */
9035 if (!TARGET_64BIT)
9036 return true;
9037 canonic = ix86_canonical_va_list_type (type);
9038 return (canonic == ms_va_list_type_node
9039 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9042 /* Implement va_start. */
9044 static void
9045 ix86_va_start (tree valist, rtx nextarg)
9047 HOST_WIDE_INT words, n_gpr, n_fpr;
9048 tree f_gpr, f_fpr, f_ovf, f_sav;
9049 tree gpr, fpr, ovf, sav, t;
9050 tree type;
9051 rtx ovf_rtx;
9053 if (flag_split_stack
9054 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9056 unsigned int scratch_regno;
9058 /* When we are splitting the stack, we can't refer to the stack
9059 arguments using internal_arg_pointer, because they may be on
9060 the old stack. The split stack prologue will arrange to
9061 leave a pointer to the old stack arguments in a scratch
9062 register, which we here copy to a pseudo-register. The split
9063 stack prologue can't set the pseudo-register directly because
9064 it (the prologue) runs before any registers have been saved. */
9066 scratch_regno = split_stack_prologue_scratch_regno ();
9067 if (scratch_regno != INVALID_REGNUM)
9069 rtx reg;
9070 rtx_insn *seq;
9072 reg = gen_reg_rtx (Pmode);
9073 cfun->machine->split_stack_varargs_pointer = reg;
9075 start_sequence ();
9076 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9077 seq = get_insns ();
9078 end_sequence ();
9080 push_topmost_sequence ();
9081 emit_insn_after (seq, entry_of_function ());
9082 pop_topmost_sequence ();
9086 /* Only 64bit target needs something special. */
9087 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9089 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9090 std_expand_builtin_va_start (valist, nextarg);
9091 else
9093 rtx va_r, next;
9095 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9096 next = expand_binop (ptr_mode, add_optab,
9097 cfun->machine->split_stack_varargs_pointer,
9098 crtl->args.arg_offset_rtx,
9099 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9100 convert_move (va_r, next, 0);
9102 /* Store zero bounds for va_list. */
9103 if (chkp_function_instrumented_p (current_function_decl))
9104 chkp_expand_bounds_reset_for_mem (valist,
9105 make_tree (TREE_TYPE (valist),
9106 next));
9109 return;
9112 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9113 f_fpr = DECL_CHAIN (f_gpr);
9114 f_ovf = DECL_CHAIN (f_fpr);
9115 f_sav = DECL_CHAIN (f_ovf);
9117 valist = build_simple_mem_ref (valist);
9118 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9119 /* The following should be folded into the MEM_REF offset. */
9120 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9121 f_gpr, NULL_TREE);
9122 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9123 f_fpr, NULL_TREE);
9124 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9125 f_ovf, NULL_TREE);
9126 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9127 f_sav, NULL_TREE);
9129 /* Count number of gp and fp argument registers used. */
9130 words = crtl->args.info.words;
9131 n_gpr = crtl->args.info.regno;
9132 n_fpr = crtl->args.info.sse_regno;
9134 if (cfun->va_list_gpr_size)
9136 type = TREE_TYPE (gpr);
9137 t = build2 (MODIFY_EXPR, type,
9138 gpr, build_int_cst (type, n_gpr * 8));
9139 TREE_SIDE_EFFECTS (t) = 1;
9140 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9143 if (TARGET_SSE && cfun->va_list_fpr_size)
9145 type = TREE_TYPE (fpr);
9146 t = build2 (MODIFY_EXPR, type, fpr,
9147 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9148 TREE_SIDE_EFFECTS (t) = 1;
9149 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9152 /* Find the overflow area. */
9153 type = TREE_TYPE (ovf);
9154 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9155 ovf_rtx = crtl->args.internal_arg_pointer;
9156 else
9157 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9158 t = make_tree (type, ovf_rtx);
9159 if (words != 0)
9160 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9162 /* Store zero bounds for overflow area pointer. */
9163 if (chkp_function_instrumented_p (current_function_decl))
9164 chkp_expand_bounds_reset_for_mem (ovf, t);
9166 t = build2 (MODIFY_EXPR, type, ovf, t);
9167 TREE_SIDE_EFFECTS (t) = 1;
9168 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9170 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9172 /* Find the register save area.
9173 Prologue of the function save it right above stack frame. */
9174 type = TREE_TYPE (sav);
9175 t = make_tree (type, frame_pointer_rtx);
9176 if (!ix86_varargs_gpr_size)
9177 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9179 /* Store zero bounds for save area pointer. */
9180 if (chkp_function_instrumented_p (current_function_decl))
9181 chkp_expand_bounds_reset_for_mem (sav, t);
9183 t = build2 (MODIFY_EXPR, type, sav, t);
9184 TREE_SIDE_EFFECTS (t) = 1;
9185 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9189 /* Implement va_arg. */
9191 static tree
9192 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9193 gimple_seq *post_p)
9195 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9196 tree f_gpr, f_fpr, f_ovf, f_sav;
9197 tree gpr, fpr, ovf, sav, t;
9198 int size, rsize;
9199 tree lab_false, lab_over = NULL_TREE;
9200 tree addr, t2;
9201 rtx container;
9202 int indirect_p = 0;
9203 tree ptrtype;
9204 machine_mode nat_mode;
9205 unsigned int arg_boundary;
9207 /* Only 64bit target needs something special. */
9208 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9209 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9211 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9212 f_fpr = DECL_CHAIN (f_gpr);
9213 f_ovf = DECL_CHAIN (f_fpr);
9214 f_sav = DECL_CHAIN (f_ovf);
9216 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9217 valist, f_gpr, NULL_TREE);
9219 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9220 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9221 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9223 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9224 if (indirect_p)
9225 type = build_pointer_type (type);
9226 size = int_size_in_bytes (type);
9227 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9229 nat_mode = type_natural_mode (type, NULL, false);
9230 switch (nat_mode)
9232 case V8SFmode:
9233 case V8SImode:
9234 case V32QImode:
9235 case V16HImode:
9236 case V4DFmode:
9237 case V4DImode:
9238 case V16SFmode:
9239 case V16SImode:
9240 case V64QImode:
9241 case V32HImode:
9242 case V8DFmode:
9243 case V8DImode:
9244 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9245 if (!TARGET_64BIT_MS_ABI)
9247 container = NULL;
9248 break;
9251 default:
9252 container = construct_container (nat_mode, TYPE_MODE (type),
9253 type, 0, X86_64_REGPARM_MAX,
9254 X86_64_SSE_REGPARM_MAX, intreg,
9256 break;
9259 /* Pull the value out of the saved registers. */
9261 addr = create_tmp_var (ptr_type_node, "addr");
9263 if (container)
9265 int needed_intregs, needed_sseregs;
9266 bool need_temp;
9267 tree int_addr, sse_addr;
9269 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9270 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9272 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9274 need_temp = (!REG_P (container)
9275 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9276 || TYPE_ALIGN (type) > 128));
9278 /* In case we are passing structure, verify that it is consecutive block
9279 on the register save area. If not we need to do moves. */
9280 if (!need_temp && !REG_P (container))
9282 /* Verify that all registers are strictly consecutive */
9283 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9285 int i;
9287 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9289 rtx slot = XVECEXP (container, 0, i);
9290 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9291 || INTVAL (XEXP (slot, 1)) != i * 16)
9292 need_temp = true;
9295 else
9297 int i;
9299 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9301 rtx slot = XVECEXP (container, 0, i);
9302 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9303 || INTVAL (XEXP (slot, 1)) != i * 8)
9304 need_temp = true;
9308 if (!need_temp)
9310 int_addr = addr;
9311 sse_addr = addr;
9313 else
9315 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9316 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9319 /* First ensure that we fit completely in registers. */
9320 if (needed_intregs)
9322 t = build_int_cst (TREE_TYPE (gpr),
9323 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9324 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9325 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9326 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9327 gimplify_and_add (t, pre_p);
9329 if (needed_sseregs)
9331 t = build_int_cst (TREE_TYPE (fpr),
9332 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9333 + X86_64_REGPARM_MAX * 8);
9334 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9335 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9336 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9337 gimplify_and_add (t, pre_p);
9340 /* Compute index to start of area used for integer regs. */
9341 if (needed_intregs)
9343 /* int_addr = gpr + sav; */
9344 t = fold_build_pointer_plus (sav, gpr);
9345 gimplify_assign (int_addr, t, pre_p);
9347 if (needed_sseregs)
9349 /* sse_addr = fpr + sav; */
9350 t = fold_build_pointer_plus (sav, fpr);
9351 gimplify_assign (sse_addr, t, pre_p);
9353 if (need_temp)
9355 int i, prev_size = 0;
9356 tree temp = create_tmp_var (type, "va_arg_tmp");
9358 /* addr = &temp; */
9359 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9360 gimplify_assign (addr, t, pre_p);
9362 for (i = 0; i < XVECLEN (container, 0); i++)
9364 rtx slot = XVECEXP (container, 0, i);
9365 rtx reg = XEXP (slot, 0);
9366 machine_mode mode = GET_MODE (reg);
9367 tree piece_type;
9368 tree addr_type;
9369 tree daddr_type;
9370 tree src_addr, src;
9371 int src_offset;
9372 tree dest_addr, dest;
9373 int cur_size = GET_MODE_SIZE (mode);
9375 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9376 prev_size = INTVAL (XEXP (slot, 1));
9377 if (prev_size + cur_size > size)
9379 cur_size = size - prev_size;
9380 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9381 if (mode == BLKmode)
9382 mode = QImode;
9384 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9385 if (mode == GET_MODE (reg))
9386 addr_type = build_pointer_type (piece_type);
9387 else
9388 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9389 true);
9390 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9391 true);
9393 if (SSE_REGNO_P (REGNO (reg)))
9395 src_addr = sse_addr;
9396 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9398 else
9400 src_addr = int_addr;
9401 src_offset = REGNO (reg) * 8;
9403 src_addr = fold_convert (addr_type, src_addr);
9404 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9406 dest_addr = fold_convert (daddr_type, addr);
9407 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9408 if (cur_size == GET_MODE_SIZE (mode))
9410 src = build_va_arg_indirect_ref (src_addr);
9411 dest = build_va_arg_indirect_ref (dest_addr);
9413 gimplify_assign (dest, src, pre_p);
9415 else
9417 tree copy
9418 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9419 3, dest_addr, src_addr,
9420 size_int (cur_size));
9421 gimplify_and_add (copy, pre_p);
9423 prev_size += cur_size;
9427 if (needed_intregs)
9429 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9430 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9431 gimplify_assign (gpr, t, pre_p);
9434 if (needed_sseregs)
9436 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9437 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9438 gimplify_assign (unshare_expr (fpr), t, pre_p);
9441 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9443 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9446 /* ... otherwise out of the overflow area. */
9448 /* When we align parameter on stack for caller, if the parameter
9449 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9450 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9451 here with caller. */
9452 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9453 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9454 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9456 /* Care for on-stack alignment if needed. */
9457 if (arg_boundary <= 64 || size == 0)
9458 t = ovf;
9459 else
9461 HOST_WIDE_INT align = arg_boundary / 8;
9462 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9463 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9464 build_int_cst (TREE_TYPE (t), -align));
9467 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9468 gimplify_assign (addr, t, pre_p);
9470 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9471 gimplify_assign (unshare_expr (ovf), t, pre_p);
9473 if (container)
9474 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9476 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9477 addr = fold_convert (ptrtype, addr);
9479 if (indirect_p)
9480 addr = build_va_arg_indirect_ref (addr);
9481 return build_va_arg_indirect_ref (addr);
9484 /* Return true if OPNUM's MEM should be matched
9485 in movabs* patterns. */
9487 bool
9488 ix86_check_movabs (rtx insn, int opnum)
9490 rtx set, mem;
9492 set = PATTERN (insn);
9493 if (GET_CODE (set) == PARALLEL)
9494 set = XVECEXP (set, 0, 0);
9495 gcc_assert (GET_CODE (set) == SET);
9496 mem = XEXP (set, opnum);
9497 while (SUBREG_P (mem))
9498 mem = SUBREG_REG (mem);
9499 gcc_assert (MEM_P (mem));
9500 return volatile_ok || !MEM_VOLATILE_P (mem);
9503 /* Initialize the table of extra 80387 mathematical constants. */
9505 static void
9506 init_ext_80387_constants (void)
9508 static const char * cst[5] =
9510 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9511 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9512 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9513 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9514 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9516 int i;
9518 for (i = 0; i < 5; i++)
9520 real_from_string (&ext_80387_constants_table[i], cst[i]);
9521 /* Ensure each constant is rounded to XFmode precision. */
9522 real_convert (&ext_80387_constants_table[i],
9523 XFmode, &ext_80387_constants_table[i]);
9526 ext_80387_constants_init = 1;
9529 /* Return non-zero if the constant is something that
9530 can be loaded with a special instruction. */
9533 standard_80387_constant_p (rtx x)
9535 machine_mode mode = GET_MODE (x);
9537 REAL_VALUE_TYPE r;
9539 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9540 return -1;
9542 if (x == CONST0_RTX (mode))
9543 return 1;
9544 if (x == CONST1_RTX (mode))
9545 return 2;
9547 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9549 /* For XFmode constants, try to find a special 80387 instruction when
9550 optimizing for size or on those CPUs that benefit from them. */
9551 if (mode == XFmode
9552 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9554 int i;
9556 if (! ext_80387_constants_init)
9557 init_ext_80387_constants ();
9559 for (i = 0; i < 5; i++)
9560 if (real_identical (&r, &ext_80387_constants_table[i]))
9561 return i + 3;
9564 /* Load of the constant -0.0 or -1.0 will be split as
9565 fldz;fchs or fld1;fchs sequence. */
9566 if (real_isnegzero (&r))
9567 return 8;
9568 if (real_identical (&r, &dconstm1))
9569 return 9;
9571 return 0;
9574 /* Return the opcode of the special instruction to be used to load
9575 the constant X. */
9577 const char *
9578 standard_80387_constant_opcode (rtx x)
9580 switch (standard_80387_constant_p (x))
9582 case 1:
9583 return "fldz";
9584 case 2:
9585 return "fld1";
9586 case 3:
9587 return "fldlg2";
9588 case 4:
9589 return "fldln2";
9590 case 5:
9591 return "fldl2e";
9592 case 6:
9593 return "fldl2t";
9594 case 7:
9595 return "fldpi";
9596 case 8:
9597 case 9:
9598 return "#";
9599 default:
9600 gcc_unreachable ();
9604 /* Return the CONST_DOUBLE representing the 80387 constant that is
9605 loaded by the specified special instruction. The argument IDX
9606 matches the return value from standard_80387_constant_p. */
9609 standard_80387_constant_rtx (int idx)
9611 int i;
9613 if (! ext_80387_constants_init)
9614 init_ext_80387_constants ();
9616 switch (idx)
9618 case 3:
9619 case 4:
9620 case 5:
9621 case 6:
9622 case 7:
9623 i = idx - 3;
9624 break;
9626 default:
9627 gcc_unreachable ();
9630 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9631 XFmode);
9634 /* Return 1 if X is all 0s and 2 if x is all 1s
9635 in supported SSE/AVX vector mode. */
9638 standard_sse_constant_p (rtx x)
9640 machine_mode mode;
9642 if (!TARGET_SSE)
9643 return 0;
9645 mode = GET_MODE (x);
9647 if (x == const0_rtx || x == CONST0_RTX (mode))
9648 return 1;
9649 if (vector_all_ones_operand (x, mode))
9650 switch (mode)
9652 case V16QImode:
9653 case V8HImode:
9654 case V4SImode:
9655 case V2DImode:
9656 if (TARGET_SSE2)
9657 return 2;
9658 case V32QImode:
9659 case V16HImode:
9660 case V8SImode:
9661 case V4DImode:
9662 if (TARGET_AVX2)
9663 return 2;
9664 case V64QImode:
9665 case V32HImode:
9666 case V16SImode:
9667 case V8DImode:
9668 if (TARGET_AVX512F)
9669 return 2;
9670 default:
9671 break;
9674 return 0;
9677 /* Return the opcode of the special instruction to be used to load
9678 the constant X. */
9680 const char *
9681 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9683 switch (standard_sse_constant_p (x))
9685 case 1:
9686 switch (get_attr_mode (insn))
9688 case MODE_XI:
9689 return "vpxord\t%g0, %g0, %g0";
9690 case MODE_V16SF:
9691 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9692 : "vpxord\t%g0, %g0, %g0";
9693 case MODE_V8DF:
9694 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9695 : "vpxorq\t%g0, %g0, %g0";
9696 case MODE_TI:
9697 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9698 : "%vpxor\t%0, %d0";
9699 case MODE_V2DF:
9700 return "%vxorpd\t%0, %d0";
9701 case MODE_V4SF:
9702 return "%vxorps\t%0, %d0";
9704 case MODE_OI:
9705 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9706 : "vpxor\t%x0, %x0, %x0";
9707 case MODE_V4DF:
9708 return "vxorpd\t%x0, %x0, %x0";
9709 case MODE_V8SF:
9710 return "vxorps\t%x0, %x0, %x0";
9712 default:
9713 break;
9716 case 2:
9717 if (TARGET_AVX512VL
9718 || get_attr_mode (insn) == MODE_XI
9719 || get_attr_mode (insn) == MODE_V8DF
9720 || get_attr_mode (insn) == MODE_V16SF)
9721 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9722 if (TARGET_AVX)
9723 return "vpcmpeqd\t%0, %0, %0";
9724 else
9725 return "pcmpeqd\t%0, %0";
9727 default:
9728 break;
9730 gcc_unreachable ();
9733 /* Returns true if OP contains a symbol reference */
9735 bool
9736 symbolic_reference_mentioned_p (rtx op)
9738 const char *fmt;
9739 int i;
9741 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9742 return true;
9744 fmt = GET_RTX_FORMAT (GET_CODE (op));
9745 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9747 if (fmt[i] == 'E')
9749 int j;
9751 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9752 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9753 return true;
9756 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9757 return true;
9760 return false;
9763 /* Return true if it is appropriate to emit `ret' instructions in the
9764 body of a function. Do this only if the epilogue is simple, needing a
9765 couple of insns. Prior to reloading, we can't tell how many registers
9766 must be saved, so return false then. Return false if there is no frame
9767 marker to de-allocate. */
9769 bool
9770 ix86_can_use_return_insn_p (void)
9772 struct ix86_frame frame;
9774 if (! reload_completed || frame_pointer_needed)
9775 return 0;
9777 /* Don't allow more than 32k pop, since that's all we can do
9778 with one instruction. */
9779 if (crtl->args.pops_args && crtl->args.size >= 32768)
9780 return 0;
9782 ix86_compute_frame_layout (&frame);
9783 return (frame.stack_pointer_offset == UNITS_PER_WORD
9784 && (frame.nregs + frame.nsseregs) == 0);
9787 /* Value should be nonzero if functions must have frame pointers.
9788 Zero means the frame pointer need not be set up (and parms may
9789 be accessed via the stack pointer) in functions that seem suitable. */
9791 static bool
9792 ix86_frame_pointer_required (void)
9794 /* If we accessed previous frames, then the generated code expects
9795 to be able to access the saved ebp value in our frame. */
9796 if (cfun->machine->accesses_prev_frame)
9797 return true;
9799 /* Several x86 os'es need a frame pointer for other reasons,
9800 usually pertaining to setjmp. */
9801 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9802 return true;
9804 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9805 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9806 return true;
9808 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9809 allocation is 4GB. */
9810 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9811 return true;
9813 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9814 turns off the frame pointer by default. Turn it back on now if
9815 we've not got a leaf function. */
9816 if (TARGET_OMIT_LEAF_FRAME_POINTER
9817 && (!crtl->is_leaf
9818 || ix86_current_function_calls_tls_descriptor))
9819 return true;
9821 if (crtl->profile && !flag_fentry)
9822 return true;
9824 return false;
9827 /* Record that the current function accesses previous call frames. */
9829 void
9830 ix86_setup_frame_addresses (void)
9832 cfun->machine->accesses_prev_frame = 1;
9835 #ifndef USE_HIDDEN_LINKONCE
9836 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9837 # define USE_HIDDEN_LINKONCE 1
9838 # else
9839 # define USE_HIDDEN_LINKONCE 0
9840 # endif
9841 #endif
9843 static int pic_labels_used;
9845 /* Fills in the label name that should be used for a pc thunk for
9846 the given register. */
9848 static void
9849 get_pc_thunk_name (char name[32], unsigned int regno)
9851 gcc_assert (!TARGET_64BIT);
9853 if (USE_HIDDEN_LINKONCE)
9854 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9855 else
9856 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9860 /* This function generates code for -fpic that loads %ebx with
9861 the return address of the caller and then returns. */
9863 static void
9864 ix86_code_end (void)
9866 rtx xops[2];
9867 int regno;
9869 for (regno = AX_REG; regno <= SP_REG; regno++)
9871 char name[32];
9872 tree decl;
9874 if (!(pic_labels_used & (1 << regno)))
9875 continue;
9877 get_pc_thunk_name (name, regno);
9879 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9880 get_identifier (name),
9881 build_function_type_list (void_type_node, NULL_TREE));
9882 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9883 NULL_TREE, void_type_node);
9884 TREE_PUBLIC (decl) = 1;
9885 TREE_STATIC (decl) = 1;
9886 DECL_IGNORED_P (decl) = 1;
9888 #if TARGET_MACHO
9889 if (TARGET_MACHO)
9891 switch_to_section (darwin_sections[text_coal_section]);
9892 fputs ("\t.weak_definition\t", asm_out_file);
9893 assemble_name (asm_out_file, name);
9894 fputs ("\n\t.private_extern\t", asm_out_file);
9895 assemble_name (asm_out_file, name);
9896 putc ('\n', asm_out_file);
9897 ASM_OUTPUT_LABEL (asm_out_file, name);
9898 DECL_WEAK (decl) = 1;
9900 else
9901 #endif
9902 if (USE_HIDDEN_LINKONCE)
9904 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9906 targetm.asm_out.unique_section (decl, 0);
9907 switch_to_section (get_named_section (decl, NULL, 0));
9909 targetm.asm_out.globalize_label (asm_out_file, name);
9910 fputs ("\t.hidden\t", asm_out_file);
9911 assemble_name (asm_out_file, name);
9912 putc ('\n', asm_out_file);
9913 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9915 else
9917 switch_to_section (text_section);
9918 ASM_OUTPUT_LABEL (asm_out_file, name);
9921 DECL_INITIAL (decl) = make_node (BLOCK);
9922 current_function_decl = decl;
9923 init_function_start (decl);
9924 first_function_block_is_cold = false;
9925 /* Make sure unwind info is emitted for the thunk if needed. */
9926 final_start_function (emit_barrier (), asm_out_file, 1);
9928 /* Pad stack IP move with 4 instructions (two NOPs count
9929 as one instruction). */
9930 if (TARGET_PAD_SHORT_FUNCTION)
9932 int i = 8;
9934 while (i--)
9935 fputs ("\tnop\n", asm_out_file);
9938 xops[0] = gen_rtx_REG (Pmode, regno);
9939 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9940 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9941 output_asm_insn ("%!ret", NULL);
9942 final_end_function ();
9943 init_insn_lengths ();
9944 free_after_compilation (cfun);
9945 set_cfun (NULL);
9946 current_function_decl = NULL;
9949 if (flag_split_stack)
9950 file_end_indicate_split_stack ();
9953 /* Emit code for the SET_GOT patterns. */
9955 const char *
9956 output_set_got (rtx dest, rtx label)
9958 rtx xops[3];
9960 xops[0] = dest;
9962 if (TARGET_VXWORKS_RTP && flag_pic)
9964 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9965 xops[2] = gen_rtx_MEM (Pmode,
9966 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9967 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9969 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9970 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9971 an unadorned address. */
9972 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9973 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9974 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9975 return "";
9978 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9980 if (!flag_pic)
9982 if (TARGET_MACHO)
9983 /* We don't need a pic base, we're not producing pic. */
9984 gcc_unreachable ();
9986 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9987 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9988 targetm.asm_out.internal_label (asm_out_file, "L",
9989 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9991 else
9993 char name[32];
9994 get_pc_thunk_name (name, REGNO (dest));
9995 pic_labels_used |= 1 << REGNO (dest);
9997 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9998 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9999 output_asm_insn ("%!call\t%X2", xops);
10001 #if TARGET_MACHO
10002 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
10003 This is what will be referenced by the Mach-O PIC subsystem. */
10004 if (machopic_should_output_picbase_label () || !label)
10005 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
10007 /* When we are restoring the pic base at the site of a nonlocal label,
10008 and we decided to emit the pic base above, we will still output a
10009 local label used for calculating the correction offset (even though
10010 the offset will be 0 in that case). */
10011 if (label)
10012 targetm.asm_out.internal_label (asm_out_file, "L",
10013 CODE_LABEL_NUMBER (label));
10014 #endif
10017 if (!TARGET_MACHO)
10018 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10020 return "";
10023 /* Generate an "push" pattern for input ARG. */
10025 static rtx
10026 gen_push (rtx arg)
10028 struct machine_function *m = cfun->machine;
10030 if (m->fs.cfa_reg == stack_pointer_rtx)
10031 m->fs.cfa_offset += UNITS_PER_WORD;
10032 m->fs.sp_offset += UNITS_PER_WORD;
10034 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10035 arg = gen_rtx_REG (word_mode, REGNO (arg));
10037 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10038 gen_rtx_PRE_DEC (Pmode,
10039 stack_pointer_rtx)),
10040 arg);
10043 /* Generate an "pop" pattern for input ARG. */
10045 static rtx
10046 gen_pop (rtx arg)
10048 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10049 arg = gen_rtx_REG (word_mode, REGNO (arg));
10051 return gen_rtx_SET (arg,
10052 gen_rtx_MEM (word_mode,
10053 gen_rtx_POST_INC (Pmode,
10054 stack_pointer_rtx)));
10057 /* Return >= 0 if there is an unused call-clobbered register available
10058 for the entire function. */
10060 static unsigned int
10061 ix86_select_alt_pic_regnum (void)
10063 if (ix86_use_pseudo_pic_reg ())
10064 return INVALID_REGNUM;
10066 if (crtl->is_leaf
10067 && !crtl->profile
10068 && !ix86_current_function_calls_tls_descriptor)
10070 int i, drap;
10071 /* Can't use the same register for both PIC and DRAP. */
10072 if (crtl->drap_reg)
10073 drap = REGNO (crtl->drap_reg);
10074 else
10075 drap = -1;
10076 for (i = 2; i >= 0; --i)
10077 if (i != drap && !df_regs_ever_live_p (i))
10078 return i;
10081 return INVALID_REGNUM;
10084 /* Return TRUE if we need to save REGNO. */
10086 static bool
10087 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
10089 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10090 && pic_offset_table_rtx)
10092 if (ix86_use_pseudo_pic_reg ())
10094 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10095 _mcount in prologue. */
10096 if (!TARGET_64BIT && flag_pic && crtl->profile)
10097 return true;
10099 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10100 || crtl->profile
10101 || crtl->calls_eh_return
10102 || crtl->uses_const_pool
10103 || cfun->has_nonlocal_label)
10104 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10107 if (crtl->calls_eh_return && maybe_eh_return)
10109 unsigned i;
10110 for (i = 0; ; i++)
10112 unsigned test = EH_RETURN_DATA_REGNO (i);
10113 if (test == INVALID_REGNUM)
10114 break;
10115 if (test == regno)
10116 return true;
10120 if (crtl->drap_reg
10121 && regno == REGNO (crtl->drap_reg)
10122 && !cfun->machine->no_drap_save_restore)
10123 return true;
10125 return (df_regs_ever_live_p (regno)
10126 && !call_used_regs[regno]
10127 && !fixed_regs[regno]
10128 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10131 /* Return number of saved general prupose registers. */
10133 static int
10134 ix86_nsaved_regs (void)
10136 int nregs = 0;
10137 int regno;
10139 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10140 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10141 nregs ++;
10142 return nregs;
10145 /* Return number of saved SSE registrers. */
10147 static int
10148 ix86_nsaved_sseregs (void)
10150 int nregs = 0;
10151 int regno;
10153 if (!TARGET_64BIT_MS_ABI)
10154 return 0;
10155 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10156 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10157 nregs ++;
10158 return nregs;
10161 /* Given FROM and TO register numbers, say whether this elimination is
10162 allowed. If stack alignment is needed, we can only replace argument
10163 pointer with hard frame pointer, or replace frame pointer with stack
10164 pointer. Otherwise, frame pointer elimination is automatically
10165 handled and all other eliminations are valid. */
10167 static bool
10168 ix86_can_eliminate (const int from, const int to)
10170 if (stack_realign_fp)
10171 return ((from == ARG_POINTER_REGNUM
10172 && to == HARD_FRAME_POINTER_REGNUM)
10173 || (from == FRAME_POINTER_REGNUM
10174 && to == STACK_POINTER_REGNUM));
10175 else
10176 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10179 /* Return the offset between two registers, one to be eliminated, and the other
10180 its replacement, at the start of a routine. */
10182 HOST_WIDE_INT
10183 ix86_initial_elimination_offset (int from, int to)
10185 struct ix86_frame frame;
10186 ix86_compute_frame_layout (&frame);
10188 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10189 return frame.hard_frame_pointer_offset;
10190 else if (from == FRAME_POINTER_REGNUM
10191 && to == HARD_FRAME_POINTER_REGNUM)
10192 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10193 else
10195 gcc_assert (to == STACK_POINTER_REGNUM);
10197 if (from == ARG_POINTER_REGNUM)
10198 return frame.stack_pointer_offset;
10200 gcc_assert (from == FRAME_POINTER_REGNUM);
10201 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10205 /* In a dynamically-aligned function, we can't know the offset from
10206 stack pointer to frame pointer, so we must ensure that setjmp
10207 eliminates fp against the hard fp (%ebp) rather than trying to
10208 index from %esp up to the top of the frame across a gap that is
10209 of unknown (at compile-time) size. */
10210 static rtx
10211 ix86_builtin_setjmp_frame_value (void)
10213 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10216 /* When using -fsplit-stack, the allocation routines set a field in
10217 the TCB to the bottom of the stack plus this much space, measured
10218 in bytes. */
10220 #define SPLIT_STACK_AVAILABLE 256
10222 /* Fill structure ix86_frame about frame of currently computed function. */
10224 static void
10225 ix86_compute_frame_layout (struct ix86_frame *frame)
10227 unsigned HOST_WIDE_INT stack_alignment_needed;
10228 HOST_WIDE_INT offset;
10229 unsigned HOST_WIDE_INT preferred_alignment;
10230 HOST_WIDE_INT size = get_frame_size ();
10231 HOST_WIDE_INT to_allocate;
10233 frame->nregs = ix86_nsaved_regs ();
10234 frame->nsseregs = ix86_nsaved_sseregs ();
10236 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10237 function prologues and leaf. */
10238 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10239 && (!crtl->is_leaf || cfun->calls_alloca != 0
10240 || ix86_current_function_calls_tls_descriptor))
10242 crtl->preferred_stack_boundary = 128;
10243 crtl->stack_alignment_needed = 128;
10245 /* preferred_stack_boundary is never updated for call
10246 expanded from tls descriptor. Update it here. We don't update it in
10247 expand stage because according to the comments before
10248 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10249 away. */
10250 else if (ix86_current_function_calls_tls_descriptor
10251 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10253 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10254 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10255 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10258 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10259 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10261 gcc_assert (!size || stack_alignment_needed);
10262 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10263 gcc_assert (preferred_alignment <= stack_alignment_needed);
10265 /* For SEH we have to limit the amount of code movement into the prologue.
10266 At present we do this via a BLOCKAGE, at which point there's very little
10267 scheduling that can be done, which means that there's very little point
10268 in doing anything except PUSHs. */
10269 if (TARGET_SEH)
10270 cfun->machine->use_fast_prologue_epilogue = false;
10272 /* During reload iteration the amount of registers saved can change.
10273 Recompute the value as needed. Do not recompute when amount of registers
10274 didn't change as reload does multiple calls to the function and does not
10275 expect the decision to change within single iteration. */
10276 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10277 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10279 int count = frame->nregs;
10280 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10282 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10284 /* The fast prologue uses move instead of push to save registers. This
10285 is significantly longer, but also executes faster as modern hardware
10286 can execute the moves in parallel, but can't do that for push/pop.
10288 Be careful about choosing what prologue to emit: When function takes
10289 many instructions to execute we may use slow version as well as in
10290 case function is known to be outside hot spot (this is known with
10291 feedback only). Weight the size of function by number of registers
10292 to save as it is cheap to use one or two push instructions but very
10293 slow to use many of them. */
10294 if (count)
10295 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10296 if (node->frequency < NODE_FREQUENCY_NORMAL
10297 || (flag_branch_probabilities
10298 && node->frequency < NODE_FREQUENCY_HOT))
10299 cfun->machine->use_fast_prologue_epilogue = false;
10300 else
10301 cfun->machine->use_fast_prologue_epilogue
10302 = !expensive_function_p (count);
10305 frame->save_regs_using_mov
10306 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10307 /* If static stack checking is enabled and done with probes,
10308 the registers need to be saved before allocating the frame. */
10309 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10311 /* Skip return address. */
10312 offset = UNITS_PER_WORD;
10314 /* Skip pushed static chain. */
10315 if (ix86_static_chain_on_stack)
10316 offset += UNITS_PER_WORD;
10318 /* Skip saved base pointer. */
10319 if (frame_pointer_needed)
10320 offset += UNITS_PER_WORD;
10321 frame->hfp_save_offset = offset;
10323 /* The traditional frame pointer location is at the top of the frame. */
10324 frame->hard_frame_pointer_offset = offset;
10326 /* Register save area */
10327 offset += frame->nregs * UNITS_PER_WORD;
10328 frame->reg_save_offset = offset;
10330 /* On SEH target, registers are pushed just before the frame pointer
10331 location. */
10332 if (TARGET_SEH)
10333 frame->hard_frame_pointer_offset = offset;
10335 /* Align and set SSE register save area. */
10336 if (frame->nsseregs)
10338 /* The only ABI that has saved SSE registers (Win64) also has a
10339 16-byte aligned default stack, and thus we don't need to be
10340 within the re-aligned local stack frame to save them. */
10341 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10342 offset = (offset + 16 - 1) & -16;
10343 offset += frame->nsseregs * 16;
10345 frame->sse_reg_save_offset = offset;
10347 /* The re-aligned stack starts here. Values before this point are not
10348 directly comparable with values below this point. In order to make
10349 sure that no value happens to be the same before and after, force
10350 the alignment computation below to add a non-zero value. */
10351 if (stack_realign_fp)
10352 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10354 /* Va-arg area */
10355 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10356 offset += frame->va_arg_size;
10358 /* Align start of frame for local function. */
10359 if (stack_realign_fp
10360 || offset != frame->sse_reg_save_offset
10361 || size != 0
10362 || !crtl->is_leaf
10363 || cfun->calls_alloca
10364 || ix86_current_function_calls_tls_descriptor)
10365 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10367 /* Frame pointer points here. */
10368 frame->frame_pointer_offset = offset;
10370 offset += size;
10372 /* Add outgoing arguments area. Can be skipped if we eliminated
10373 all the function calls as dead code.
10374 Skipping is however impossible when function calls alloca. Alloca
10375 expander assumes that last crtl->outgoing_args_size
10376 of stack frame are unused. */
10377 if (ACCUMULATE_OUTGOING_ARGS
10378 && (!crtl->is_leaf || cfun->calls_alloca
10379 || ix86_current_function_calls_tls_descriptor))
10381 offset += crtl->outgoing_args_size;
10382 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10384 else
10385 frame->outgoing_arguments_size = 0;
10387 /* Align stack boundary. Only needed if we're calling another function
10388 or using alloca. */
10389 if (!crtl->is_leaf || cfun->calls_alloca
10390 || ix86_current_function_calls_tls_descriptor)
10391 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10393 /* We've reached end of stack frame. */
10394 frame->stack_pointer_offset = offset;
10396 /* Size prologue needs to allocate. */
10397 to_allocate = offset - frame->sse_reg_save_offset;
10399 if ((!to_allocate && frame->nregs <= 1)
10400 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10401 frame->save_regs_using_mov = false;
10403 if (ix86_using_red_zone ()
10404 && crtl->sp_is_unchanging
10405 && crtl->is_leaf
10406 && !ix86_current_function_calls_tls_descriptor)
10408 frame->red_zone_size = to_allocate;
10409 if (frame->save_regs_using_mov)
10410 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10411 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10412 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10414 else
10415 frame->red_zone_size = 0;
10416 frame->stack_pointer_offset -= frame->red_zone_size;
10418 /* The SEH frame pointer location is near the bottom of the frame.
10419 This is enforced by the fact that the difference between the
10420 stack pointer and the frame pointer is limited to 240 bytes in
10421 the unwind data structure. */
10422 if (TARGET_SEH)
10424 HOST_WIDE_INT diff;
10426 /* If we can leave the frame pointer where it is, do so. Also, returns
10427 the establisher frame for __builtin_frame_address (0). */
10428 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10429 if (diff <= SEH_MAX_FRAME_SIZE
10430 && (diff > 240 || (diff & 15) != 0)
10431 && !crtl->accesses_prior_frames)
10433 /* Ideally we'd determine what portion of the local stack frame
10434 (within the constraint of the lowest 240) is most heavily used.
10435 But without that complication, simply bias the frame pointer
10436 by 128 bytes so as to maximize the amount of the local stack
10437 frame that is addressable with 8-bit offsets. */
10438 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10443 /* This is semi-inlined memory_address_length, but simplified
10444 since we know that we're always dealing with reg+offset, and
10445 to avoid having to create and discard all that rtl. */
10447 static inline int
10448 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10450 int len = 4;
10452 if (offset == 0)
10454 /* EBP and R13 cannot be encoded without an offset. */
10455 len = (regno == BP_REG || regno == R13_REG);
10457 else if (IN_RANGE (offset, -128, 127))
10458 len = 1;
10460 /* ESP and R12 must be encoded with a SIB byte. */
10461 if (regno == SP_REG || regno == R12_REG)
10462 len++;
10464 return len;
10467 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10468 The valid base registers are taken from CFUN->MACHINE->FS. */
10470 static rtx
10471 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10473 const struct machine_function *m = cfun->machine;
10474 rtx base_reg = NULL;
10475 HOST_WIDE_INT base_offset = 0;
10477 if (m->use_fast_prologue_epilogue)
10479 /* Choose the base register most likely to allow the most scheduling
10480 opportunities. Generally FP is valid throughout the function,
10481 while DRAP must be reloaded within the epilogue. But choose either
10482 over the SP due to increased encoding size. */
10484 if (m->fs.fp_valid)
10486 base_reg = hard_frame_pointer_rtx;
10487 base_offset = m->fs.fp_offset - cfa_offset;
10489 else if (m->fs.drap_valid)
10491 base_reg = crtl->drap_reg;
10492 base_offset = 0 - cfa_offset;
10494 else if (m->fs.sp_valid)
10496 base_reg = stack_pointer_rtx;
10497 base_offset = m->fs.sp_offset - cfa_offset;
10500 else
10502 HOST_WIDE_INT toffset;
10503 int len = 16, tlen;
10505 /* Choose the base register with the smallest address encoding.
10506 With a tie, choose FP > DRAP > SP. */
10507 if (m->fs.sp_valid)
10509 base_reg = stack_pointer_rtx;
10510 base_offset = m->fs.sp_offset - cfa_offset;
10511 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10513 if (m->fs.drap_valid)
10515 toffset = 0 - cfa_offset;
10516 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10517 if (tlen <= len)
10519 base_reg = crtl->drap_reg;
10520 base_offset = toffset;
10521 len = tlen;
10524 if (m->fs.fp_valid)
10526 toffset = m->fs.fp_offset - cfa_offset;
10527 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10528 if (tlen <= len)
10530 base_reg = hard_frame_pointer_rtx;
10531 base_offset = toffset;
10532 len = tlen;
10536 gcc_assert (base_reg != NULL);
10538 return plus_constant (Pmode, base_reg, base_offset);
10541 /* Emit code to save registers in the prologue. */
10543 static void
10544 ix86_emit_save_regs (void)
10546 unsigned int regno;
10547 rtx_insn *insn;
10549 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10550 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10552 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10553 RTX_FRAME_RELATED_P (insn) = 1;
10557 /* Emit a single register save at CFA - CFA_OFFSET. */
10559 static void
10560 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10561 HOST_WIDE_INT cfa_offset)
10563 struct machine_function *m = cfun->machine;
10564 rtx reg = gen_rtx_REG (mode, regno);
10565 rtx mem, addr, base, insn;
10567 addr = choose_baseaddr (cfa_offset);
10568 mem = gen_frame_mem (mode, addr);
10570 /* For SSE saves, we need to indicate the 128-bit alignment. */
10571 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10573 insn = emit_move_insn (mem, reg);
10574 RTX_FRAME_RELATED_P (insn) = 1;
10576 base = addr;
10577 if (GET_CODE (base) == PLUS)
10578 base = XEXP (base, 0);
10579 gcc_checking_assert (REG_P (base));
10581 /* When saving registers into a re-aligned local stack frame, avoid
10582 any tricky guessing by dwarf2out. */
10583 if (m->fs.realigned)
10585 gcc_checking_assert (stack_realign_drap);
10587 if (regno == REGNO (crtl->drap_reg))
10589 /* A bit of a hack. We force the DRAP register to be saved in
10590 the re-aligned stack frame, which provides us with a copy
10591 of the CFA that will last past the prologue. Install it. */
10592 gcc_checking_assert (cfun->machine->fs.fp_valid);
10593 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10594 cfun->machine->fs.fp_offset - cfa_offset);
10595 mem = gen_rtx_MEM (mode, addr);
10596 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10598 else
10600 /* The frame pointer is a stable reference within the
10601 aligned frame. Use it. */
10602 gcc_checking_assert (cfun->machine->fs.fp_valid);
10603 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10604 cfun->machine->fs.fp_offset - cfa_offset);
10605 mem = gen_rtx_MEM (mode, addr);
10606 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10610 /* The memory may not be relative to the current CFA register,
10611 which means that we may need to generate a new pattern for
10612 use by the unwind info. */
10613 else if (base != m->fs.cfa_reg)
10615 addr = plus_constant (Pmode, m->fs.cfa_reg,
10616 m->fs.cfa_offset - cfa_offset);
10617 mem = gen_rtx_MEM (mode, addr);
10618 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10622 /* Emit code to save registers using MOV insns.
10623 First register is stored at CFA - CFA_OFFSET. */
10624 static void
10625 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10627 unsigned int regno;
10629 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10630 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10632 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10633 cfa_offset -= UNITS_PER_WORD;
10637 /* Emit code to save SSE registers using MOV insns.
10638 First register is stored at CFA - CFA_OFFSET. */
10639 static void
10640 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10642 unsigned int regno;
10644 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10645 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10647 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10648 cfa_offset -= 16;
10652 static GTY(()) rtx queued_cfa_restores;
10654 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10655 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10656 Don't add the note if the previously saved value will be left untouched
10657 within stack red-zone till return, as unwinders can find the same value
10658 in the register and on the stack. */
10660 static void
10661 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10663 if (!crtl->shrink_wrapped
10664 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10665 return;
10667 if (insn)
10669 add_reg_note (insn, REG_CFA_RESTORE, reg);
10670 RTX_FRAME_RELATED_P (insn) = 1;
10672 else
10673 queued_cfa_restores
10674 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10677 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10679 static void
10680 ix86_add_queued_cfa_restore_notes (rtx insn)
10682 rtx last;
10683 if (!queued_cfa_restores)
10684 return;
10685 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10687 XEXP (last, 1) = REG_NOTES (insn);
10688 REG_NOTES (insn) = queued_cfa_restores;
10689 queued_cfa_restores = NULL_RTX;
10690 RTX_FRAME_RELATED_P (insn) = 1;
10693 /* Expand prologue or epilogue stack adjustment.
10694 The pattern exist to put a dependency on all ebp-based memory accesses.
10695 STYLE should be negative if instructions should be marked as frame related,
10696 zero if %r11 register is live and cannot be freely used and positive
10697 otherwise. */
10699 static void
10700 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10701 int style, bool set_cfa)
10703 struct machine_function *m = cfun->machine;
10704 rtx insn;
10705 bool add_frame_related_expr = false;
10707 if (Pmode == SImode)
10708 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10709 else if (x86_64_immediate_operand (offset, DImode))
10710 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10711 else
10713 rtx tmp;
10714 /* r11 is used by indirect sibcall return as well, set before the
10715 epilogue and used after the epilogue. */
10716 if (style)
10717 tmp = gen_rtx_REG (DImode, R11_REG);
10718 else
10720 gcc_assert (src != hard_frame_pointer_rtx
10721 && dest != hard_frame_pointer_rtx);
10722 tmp = hard_frame_pointer_rtx;
10724 insn = emit_insn (gen_rtx_SET (tmp, offset));
10725 if (style < 0)
10726 add_frame_related_expr = true;
10728 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10731 insn = emit_insn (insn);
10732 if (style >= 0)
10733 ix86_add_queued_cfa_restore_notes (insn);
10735 if (set_cfa)
10737 rtx r;
10739 gcc_assert (m->fs.cfa_reg == src);
10740 m->fs.cfa_offset += INTVAL (offset);
10741 m->fs.cfa_reg = dest;
10743 r = gen_rtx_PLUS (Pmode, src, offset);
10744 r = gen_rtx_SET (dest, r);
10745 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10746 RTX_FRAME_RELATED_P (insn) = 1;
10748 else if (style < 0)
10750 RTX_FRAME_RELATED_P (insn) = 1;
10751 if (add_frame_related_expr)
10753 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10754 r = gen_rtx_SET (dest, r);
10755 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10759 if (dest == stack_pointer_rtx)
10761 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10762 bool valid = m->fs.sp_valid;
10764 if (src == hard_frame_pointer_rtx)
10766 valid = m->fs.fp_valid;
10767 ooffset = m->fs.fp_offset;
10769 else if (src == crtl->drap_reg)
10771 valid = m->fs.drap_valid;
10772 ooffset = 0;
10774 else
10776 /* Else there are two possibilities: SP itself, which we set
10777 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10778 taken care of this by hand along the eh_return path. */
10779 gcc_checking_assert (src == stack_pointer_rtx
10780 || offset == const0_rtx);
10783 m->fs.sp_offset = ooffset - INTVAL (offset);
10784 m->fs.sp_valid = valid;
10788 /* Find an available register to be used as dynamic realign argument
10789 pointer regsiter. Such a register will be written in prologue and
10790 used in begin of body, so it must not be
10791 1. parameter passing register.
10792 2. GOT pointer.
10793 We reuse static-chain register if it is available. Otherwise, we
10794 use DI for i386 and R13 for x86-64. We chose R13 since it has
10795 shorter encoding.
10797 Return: the regno of chosen register. */
10799 static unsigned int
10800 find_drap_reg (void)
10802 tree decl = cfun->decl;
10804 if (TARGET_64BIT)
10806 /* Use R13 for nested function or function need static chain.
10807 Since function with tail call may use any caller-saved
10808 registers in epilogue, DRAP must not use caller-saved
10809 register in such case. */
10810 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10811 return R13_REG;
10813 return R10_REG;
10815 else
10817 /* Use DI for nested function or function need static chain.
10818 Since function with tail call may use any caller-saved
10819 registers in epilogue, DRAP must not use caller-saved
10820 register in such case. */
10821 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10822 return DI_REG;
10824 /* Reuse static chain register if it isn't used for parameter
10825 passing. */
10826 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10828 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10829 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10830 return CX_REG;
10832 return DI_REG;
10836 /* Return minimum incoming stack alignment. */
10838 static unsigned int
10839 ix86_minimum_incoming_stack_boundary (bool sibcall)
10841 unsigned int incoming_stack_boundary;
10843 /* Prefer the one specified at command line. */
10844 if (ix86_user_incoming_stack_boundary)
10845 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10846 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10847 if -mstackrealign is used, it isn't used for sibcall check and
10848 estimated stack alignment is 128bit. */
10849 else if (!sibcall
10850 && !TARGET_64BIT
10851 && ix86_force_align_arg_pointer
10852 && crtl->stack_alignment_estimated == 128)
10853 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10854 else
10855 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10857 /* Incoming stack alignment can be changed on individual functions
10858 via force_align_arg_pointer attribute. We use the smallest
10859 incoming stack boundary. */
10860 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10861 && lookup_attribute (ix86_force_align_arg_pointer_string,
10862 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10863 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10865 /* The incoming stack frame has to be aligned at least at
10866 parm_stack_boundary. */
10867 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10868 incoming_stack_boundary = crtl->parm_stack_boundary;
10870 /* Stack at entrance of main is aligned by runtime. We use the
10871 smallest incoming stack boundary. */
10872 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10873 && DECL_NAME (current_function_decl)
10874 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10875 && DECL_FILE_SCOPE_P (current_function_decl))
10876 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10878 return incoming_stack_boundary;
10881 /* Update incoming stack boundary and estimated stack alignment. */
10883 static void
10884 ix86_update_stack_boundary (void)
10886 ix86_incoming_stack_boundary
10887 = ix86_minimum_incoming_stack_boundary (false);
10889 /* x86_64 vararg needs 16byte stack alignment for register save
10890 area. */
10891 if (TARGET_64BIT
10892 && cfun->stdarg
10893 && crtl->stack_alignment_estimated < 128)
10894 crtl->stack_alignment_estimated = 128;
10897 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10898 needed or an rtx for DRAP otherwise. */
10900 static rtx
10901 ix86_get_drap_rtx (void)
10903 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10904 crtl->need_drap = true;
10906 if (stack_realign_drap)
10908 /* Assign DRAP to vDRAP and returns vDRAP */
10909 unsigned int regno = find_drap_reg ();
10910 rtx drap_vreg;
10911 rtx arg_ptr;
10912 rtx_insn *seq, *insn;
10914 arg_ptr = gen_rtx_REG (Pmode, regno);
10915 crtl->drap_reg = arg_ptr;
10917 start_sequence ();
10918 drap_vreg = copy_to_reg (arg_ptr);
10919 seq = get_insns ();
10920 end_sequence ();
10922 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10923 if (!optimize)
10925 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10926 RTX_FRAME_RELATED_P (insn) = 1;
10928 return drap_vreg;
10930 else
10931 return NULL;
10934 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10936 static rtx
10937 ix86_internal_arg_pointer (void)
10939 return virtual_incoming_args_rtx;
10942 struct scratch_reg {
10943 rtx reg;
10944 bool saved;
10947 /* Return a short-lived scratch register for use on function entry.
10948 In 32-bit mode, it is valid only after the registers are saved
10949 in the prologue. This register must be released by means of
10950 release_scratch_register_on_entry once it is dead. */
10952 static void
10953 get_scratch_register_on_entry (struct scratch_reg *sr)
10955 int regno;
10957 sr->saved = false;
10959 if (TARGET_64BIT)
10961 /* We always use R11 in 64-bit mode. */
10962 regno = R11_REG;
10964 else
10966 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10967 bool fastcall_p
10968 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10969 bool thiscall_p
10970 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10971 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10972 int regparm = ix86_function_regparm (fntype, decl);
10973 int drap_regno
10974 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10976 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10977 for the static chain register. */
10978 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10979 && drap_regno != AX_REG)
10980 regno = AX_REG;
10981 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10982 for the static chain register. */
10983 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10984 regno = AX_REG;
10985 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10986 regno = DX_REG;
10987 /* ecx is the static chain register. */
10988 else if (regparm < 3 && !fastcall_p && !thiscall_p
10989 && !static_chain_p
10990 && drap_regno != CX_REG)
10991 regno = CX_REG;
10992 else if (ix86_save_reg (BX_REG, true))
10993 regno = BX_REG;
10994 /* esi is the static chain register. */
10995 else if (!(regparm == 3 && static_chain_p)
10996 && ix86_save_reg (SI_REG, true))
10997 regno = SI_REG;
10998 else if (ix86_save_reg (DI_REG, true))
10999 regno = DI_REG;
11000 else
11002 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
11003 sr->saved = true;
11007 sr->reg = gen_rtx_REG (Pmode, regno);
11008 if (sr->saved)
11010 rtx_insn *insn = emit_insn (gen_push (sr->reg));
11011 RTX_FRAME_RELATED_P (insn) = 1;
11015 /* Release a scratch register obtained from the preceding function. */
11017 static void
11018 release_scratch_register_on_entry (struct scratch_reg *sr)
11020 if (sr->saved)
11022 struct machine_function *m = cfun->machine;
11023 rtx x, insn = emit_insn (gen_pop (sr->reg));
11025 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
11026 RTX_FRAME_RELATED_P (insn) = 1;
11027 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
11028 x = gen_rtx_SET (stack_pointer_rtx, x);
11029 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
11030 m->fs.sp_offset -= UNITS_PER_WORD;
11034 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
11036 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
11038 static void
11039 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
11041 /* We skip the probe for the first interval + a small dope of 4 words and
11042 probe that many bytes past the specified size to maintain a protection
11043 area at the botton of the stack. */
11044 const int dope = 4 * UNITS_PER_WORD;
11045 rtx size_rtx = GEN_INT (size), last;
11047 /* See if we have a constant small number of probes to generate. If so,
11048 that's the easy case. The run-time loop is made up of 11 insns in the
11049 generic case while the compile-time loop is made up of 3+2*(n-1) insns
11050 for n # of intervals. */
11051 if (size <= 5 * PROBE_INTERVAL)
11053 HOST_WIDE_INT i, adjust;
11054 bool first_probe = true;
11056 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
11057 values of N from 1 until it exceeds SIZE. If only one probe is
11058 needed, this will not generate any code. Then adjust and probe
11059 to PROBE_INTERVAL + SIZE. */
11060 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11062 if (first_probe)
11064 adjust = 2 * PROBE_INTERVAL + dope;
11065 first_probe = false;
11067 else
11068 adjust = PROBE_INTERVAL;
11070 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11071 plus_constant (Pmode, stack_pointer_rtx,
11072 -adjust)));
11073 emit_stack_probe (stack_pointer_rtx);
11076 if (first_probe)
11077 adjust = size + PROBE_INTERVAL + dope;
11078 else
11079 adjust = size + PROBE_INTERVAL - i;
11081 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11082 plus_constant (Pmode, stack_pointer_rtx,
11083 -adjust)));
11084 emit_stack_probe (stack_pointer_rtx);
11086 /* Adjust back to account for the additional first interval. */
11087 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11088 plus_constant (Pmode, stack_pointer_rtx,
11089 PROBE_INTERVAL + dope)));
11092 /* Otherwise, do the same as above, but in a loop. Note that we must be
11093 extra careful with variables wrapping around because we might be at
11094 the very top (or the very bottom) of the address space and we have
11095 to be able to handle this case properly; in particular, we use an
11096 equality test for the loop condition. */
11097 else
11099 HOST_WIDE_INT rounded_size;
11100 struct scratch_reg sr;
11102 get_scratch_register_on_entry (&sr);
11105 /* Step 1: round SIZE to the previous multiple of the interval. */
11107 rounded_size = size & -PROBE_INTERVAL;
11110 /* Step 2: compute initial and final value of the loop counter. */
11112 /* SP = SP_0 + PROBE_INTERVAL. */
11113 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11114 plus_constant (Pmode, stack_pointer_rtx,
11115 - (PROBE_INTERVAL + dope))));
11117 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11118 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11119 emit_insn (gen_rtx_SET (sr.reg,
11120 gen_rtx_PLUS (Pmode, sr.reg,
11121 stack_pointer_rtx)));
11124 /* Step 3: the loop
11126 while (SP != LAST_ADDR)
11128 SP = SP + PROBE_INTERVAL
11129 probe at SP
11132 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11133 values of N from 1 until it is equal to ROUNDED_SIZE. */
11135 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11138 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11139 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11141 if (size != rounded_size)
11143 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11144 plus_constant (Pmode, stack_pointer_rtx,
11145 rounded_size - size)));
11146 emit_stack_probe (stack_pointer_rtx);
11149 /* Adjust back to account for the additional first interval. */
11150 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11151 plus_constant (Pmode, stack_pointer_rtx,
11152 PROBE_INTERVAL + dope)));
11154 release_scratch_register_on_entry (&sr);
11157 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11159 /* Even if the stack pointer isn't the CFA register, we need to correctly
11160 describe the adjustments made to it, in particular differentiate the
11161 frame-related ones from the frame-unrelated ones. */
11162 if (size > 0)
11164 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11165 XVECEXP (expr, 0, 0)
11166 = gen_rtx_SET (stack_pointer_rtx,
11167 plus_constant (Pmode, stack_pointer_rtx, -size));
11168 XVECEXP (expr, 0, 1)
11169 = gen_rtx_SET (stack_pointer_rtx,
11170 plus_constant (Pmode, stack_pointer_rtx,
11171 PROBE_INTERVAL + dope + size));
11172 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11173 RTX_FRAME_RELATED_P (last) = 1;
11175 cfun->machine->fs.sp_offset += size;
11178 /* Make sure nothing is scheduled before we are done. */
11179 emit_insn (gen_blockage ());
11182 /* Adjust the stack pointer up to REG while probing it. */
11184 const char *
11185 output_adjust_stack_and_probe (rtx reg)
11187 static int labelno = 0;
11188 char loop_lab[32], end_lab[32];
11189 rtx xops[2];
11191 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11192 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11194 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11196 /* Jump to END_LAB if SP == LAST_ADDR. */
11197 xops[0] = stack_pointer_rtx;
11198 xops[1] = reg;
11199 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11200 fputs ("\tje\t", asm_out_file);
11201 assemble_name_raw (asm_out_file, end_lab);
11202 fputc ('\n', asm_out_file);
11204 /* SP = SP + PROBE_INTERVAL. */
11205 xops[1] = GEN_INT (PROBE_INTERVAL);
11206 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11208 /* Probe at SP. */
11209 xops[1] = const0_rtx;
11210 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11212 fprintf (asm_out_file, "\tjmp\t");
11213 assemble_name_raw (asm_out_file, loop_lab);
11214 fputc ('\n', asm_out_file);
11216 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11218 return "";
11221 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11222 inclusive. These are offsets from the current stack pointer. */
11224 static void
11225 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11227 /* See if we have a constant small number of probes to generate. If so,
11228 that's the easy case. The run-time loop is made up of 7 insns in the
11229 generic case while the compile-time loop is made up of n insns for n #
11230 of intervals. */
11231 if (size <= 7 * PROBE_INTERVAL)
11233 HOST_WIDE_INT i;
11235 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11236 it exceeds SIZE. If only one probe is needed, this will not
11237 generate any code. Then probe at FIRST + SIZE. */
11238 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11239 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11240 -(first + i)));
11242 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11243 -(first + size)));
11246 /* Otherwise, do the same as above, but in a loop. Note that we must be
11247 extra careful with variables wrapping around because we might be at
11248 the very top (or the very bottom) of the address space and we have
11249 to be able to handle this case properly; in particular, we use an
11250 equality test for the loop condition. */
11251 else
11253 HOST_WIDE_INT rounded_size, last;
11254 struct scratch_reg sr;
11256 get_scratch_register_on_entry (&sr);
11259 /* Step 1: round SIZE to the previous multiple of the interval. */
11261 rounded_size = size & -PROBE_INTERVAL;
11264 /* Step 2: compute initial and final value of the loop counter. */
11266 /* TEST_OFFSET = FIRST. */
11267 emit_move_insn (sr.reg, GEN_INT (-first));
11269 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11270 last = first + rounded_size;
11273 /* Step 3: the loop
11275 while (TEST_ADDR != LAST_ADDR)
11277 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11278 probe at TEST_ADDR
11281 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11282 until it is equal to ROUNDED_SIZE. */
11284 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11287 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11288 that SIZE is equal to ROUNDED_SIZE. */
11290 if (size != rounded_size)
11291 emit_stack_probe (plus_constant (Pmode,
11292 gen_rtx_PLUS (Pmode,
11293 stack_pointer_rtx,
11294 sr.reg),
11295 rounded_size - size));
11297 release_scratch_register_on_entry (&sr);
11300 /* Make sure nothing is scheduled before we are done. */
11301 emit_insn (gen_blockage ());
11304 /* Probe a range of stack addresses from REG to END, inclusive. These are
11305 offsets from the current stack pointer. */
11307 const char *
11308 output_probe_stack_range (rtx reg, rtx end)
11310 static int labelno = 0;
11311 char loop_lab[32], end_lab[32];
11312 rtx xops[3];
11314 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11315 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11317 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11319 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11320 xops[0] = reg;
11321 xops[1] = end;
11322 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11323 fputs ("\tje\t", asm_out_file);
11324 assemble_name_raw (asm_out_file, end_lab);
11325 fputc ('\n', asm_out_file);
11327 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11328 xops[1] = GEN_INT (PROBE_INTERVAL);
11329 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11331 /* Probe at TEST_ADDR. */
11332 xops[0] = stack_pointer_rtx;
11333 xops[1] = reg;
11334 xops[2] = const0_rtx;
11335 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11337 fprintf (asm_out_file, "\tjmp\t");
11338 assemble_name_raw (asm_out_file, loop_lab);
11339 fputc ('\n', asm_out_file);
11341 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11343 return "";
11346 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11347 to be generated in correct form. */
11348 static void
11349 ix86_finalize_stack_realign_flags (void)
11351 /* Check if stack realign is really needed after reload, and
11352 stores result in cfun */
11353 unsigned int incoming_stack_boundary
11354 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11355 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11356 unsigned int stack_realign = (incoming_stack_boundary
11357 < (crtl->is_leaf
11358 ? crtl->max_used_stack_slot_alignment
11359 : crtl->stack_alignment_needed));
11361 if (crtl->stack_realign_finalized)
11363 /* After stack_realign_needed is finalized, we can't no longer
11364 change it. */
11365 gcc_assert (crtl->stack_realign_needed == stack_realign);
11366 return;
11369 /* If the only reason for frame_pointer_needed is that we conservatively
11370 assumed stack realignment might be needed, but in the end nothing that
11371 needed the stack alignment had been spilled, clear frame_pointer_needed
11372 and say we don't need stack realignment. */
11373 if (stack_realign
11374 && frame_pointer_needed
11375 && crtl->is_leaf
11376 && flag_omit_frame_pointer
11377 && crtl->sp_is_unchanging
11378 && !ix86_current_function_calls_tls_descriptor
11379 && !crtl->accesses_prior_frames
11380 && !cfun->calls_alloca
11381 && !crtl->calls_eh_return
11382 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11383 && !ix86_frame_pointer_required ()
11384 && get_frame_size () == 0
11385 && ix86_nsaved_sseregs () == 0
11386 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11388 HARD_REG_SET set_up_by_prologue, prologue_used;
11389 basic_block bb;
11391 CLEAR_HARD_REG_SET (prologue_used);
11392 CLEAR_HARD_REG_SET (set_up_by_prologue);
11393 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11394 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11395 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11396 HARD_FRAME_POINTER_REGNUM);
11397 FOR_EACH_BB_FN (bb, cfun)
11399 rtx_insn *insn;
11400 FOR_BB_INSNS (bb, insn)
11401 if (NONDEBUG_INSN_P (insn)
11402 && requires_stack_frame_p (insn, prologue_used,
11403 set_up_by_prologue))
11405 crtl->stack_realign_needed = stack_realign;
11406 crtl->stack_realign_finalized = true;
11407 return;
11411 /* If drap has been set, but it actually isn't live at the start
11412 of the function, there is no reason to set it up. */
11413 if (crtl->drap_reg)
11415 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11416 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11418 crtl->drap_reg = NULL_RTX;
11419 crtl->need_drap = false;
11422 else
11423 cfun->machine->no_drap_save_restore = true;
11425 frame_pointer_needed = false;
11426 stack_realign = false;
11427 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11428 crtl->stack_alignment_needed = incoming_stack_boundary;
11429 crtl->stack_alignment_estimated = incoming_stack_boundary;
11430 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11431 crtl->preferred_stack_boundary = incoming_stack_boundary;
11432 df_finish_pass (true);
11433 df_scan_alloc (NULL);
11434 df_scan_blocks ();
11435 df_compute_regs_ever_live (true);
11436 df_analyze ();
11439 crtl->stack_realign_needed = stack_realign;
11440 crtl->stack_realign_finalized = true;
11443 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11445 static void
11446 ix86_elim_entry_set_got (rtx reg)
11448 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11449 rtx_insn *c_insn = BB_HEAD (bb);
11450 if (!NONDEBUG_INSN_P (c_insn))
11451 c_insn = next_nonnote_nondebug_insn (c_insn);
11452 if (c_insn && NONJUMP_INSN_P (c_insn))
11454 rtx pat = PATTERN (c_insn);
11455 if (GET_CODE (pat) == PARALLEL)
11457 rtx vec = XVECEXP (pat, 0, 0);
11458 if (GET_CODE (vec) == SET
11459 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11460 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11461 delete_insn (c_insn);
11466 /* Expand the prologue into a bunch of separate insns. */
11468 void
11469 ix86_expand_prologue (void)
11471 struct machine_function *m = cfun->machine;
11472 rtx insn, t;
11473 struct ix86_frame frame;
11474 HOST_WIDE_INT allocate;
11475 bool int_registers_saved;
11476 bool sse_registers_saved;
11477 rtx static_chain = NULL_RTX;
11479 ix86_finalize_stack_realign_flags ();
11481 /* DRAP should not coexist with stack_realign_fp */
11482 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11484 memset (&m->fs, 0, sizeof (m->fs));
11486 /* Initialize CFA state for before the prologue. */
11487 m->fs.cfa_reg = stack_pointer_rtx;
11488 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11490 /* Track SP offset to the CFA. We continue tracking this after we've
11491 swapped the CFA register away from SP. In the case of re-alignment
11492 this is fudged; we're interested to offsets within the local frame. */
11493 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11494 m->fs.sp_valid = true;
11496 ix86_compute_frame_layout (&frame);
11498 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11500 /* We should have already generated an error for any use of
11501 ms_hook on a nested function. */
11502 gcc_checking_assert (!ix86_static_chain_on_stack);
11504 /* Check if profiling is active and we shall use profiling before
11505 prologue variant. If so sorry. */
11506 if (crtl->profile && flag_fentry != 0)
11507 sorry ("ms_hook_prologue attribute isn%'t compatible "
11508 "with -mfentry for 32-bit");
11510 /* In ix86_asm_output_function_label we emitted:
11511 8b ff movl.s %edi,%edi
11512 55 push %ebp
11513 8b ec movl.s %esp,%ebp
11515 This matches the hookable function prologue in Win32 API
11516 functions in Microsoft Windows XP Service Pack 2 and newer.
11517 Wine uses this to enable Windows apps to hook the Win32 API
11518 functions provided by Wine.
11520 What that means is that we've already set up the frame pointer. */
11522 if (frame_pointer_needed
11523 && !(crtl->drap_reg && crtl->stack_realign_needed))
11525 rtx push, mov;
11527 /* We've decided to use the frame pointer already set up.
11528 Describe this to the unwinder by pretending that both
11529 push and mov insns happen right here.
11531 Putting the unwind info here at the end of the ms_hook
11532 is done so that we can make absolutely certain we get
11533 the required byte sequence at the start of the function,
11534 rather than relying on an assembler that can produce
11535 the exact encoding required.
11537 However it does mean (in the unpatched case) that we have
11538 a 1 insn window where the asynchronous unwind info is
11539 incorrect. However, if we placed the unwind info at
11540 its correct location we would have incorrect unwind info
11541 in the patched case. Which is probably all moot since
11542 I don't expect Wine generates dwarf2 unwind info for the
11543 system libraries that use this feature. */
11545 insn = emit_insn (gen_blockage ());
11547 push = gen_push (hard_frame_pointer_rtx);
11548 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11549 stack_pointer_rtx);
11550 RTX_FRAME_RELATED_P (push) = 1;
11551 RTX_FRAME_RELATED_P (mov) = 1;
11553 RTX_FRAME_RELATED_P (insn) = 1;
11554 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11555 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11557 /* Note that gen_push incremented m->fs.cfa_offset, even
11558 though we didn't emit the push insn here. */
11559 m->fs.cfa_reg = hard_frame_pointer_rtx;
11560 m->fs.fp_offset = m->fs.cfa_offset;
11561 m->fs.fp_valid = true;
11563 else
11565 /* The frame pointer is not needed so pop %ebp again.
11566 This leaves us with a pristine state. */
11567 emit_insn (gen_pop (hard_frame_pointer_rtx));
11571 /* The first insn of a function that accepts its static chain on the
11572 stack is to push the register that would be filled in by a direct
11573 call. This insn will be skipped by the trampoline. */
11574 else if (ix86_static_chain_on_stack)
11576 static_chain = ix86_static_chain (cfun->decl, false);
11577 insn = emit_insn (gen_push (static_chain));
11578 emit_insn (gen_blockage ());
11580 /* We don't want to interpret this push insn as a register save,
11581 only as a stack adjustment. The real copy of the register as
11582 a save will be done later, if needed. */
11583 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11584 t = gen_rtx_SET (stack_pointer_rtx, t);
11585 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11586 RTX_FRAME_RELATED_P (insn) = 1;
11589 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11590 of DRAP is needed and stack realignment is really needed after reload */
11591 if (stack_realign_drap)
11593 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11595 /* Only need to push parameter pointer reg if it is caller saved. */
11596 if (!call_used_regs[REGNO (crtl->drap_reg)])
11598 /* Push arg pointer reg */
11599 insn = emit_insn (gen_push (crtl->drap_reg));
11600 RTX_FRAME_RELATED_P (insn) = 1;
11603 /* Grab the argument pointer. */
11604 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11605 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11606 RTX_FRAME_RELATED_P (insn) = 1;
11607 m->fs.cfa_reg = crtl->drap_reg;
11608 m->fs.cfa_offset = 0;
11610 /* Align the stack. */
11611 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11612 stack_pointer_rtx,
11613 GEN_INT (-align_bytes)));
11614 RTX_FRAME_RELATED_P (insn) = 1;
11616 /* Replicate the return address on the stack so that return
11617 address can be reached via (argp - 1) slot. This is needed
11618 to implement macro RETURN_ADDR_RTX and intrinsic function
11619 expand_builtin_return_addr etc. */
11620 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11621 t = gen_frame_mem (word_mode, t);
11622 insn = emit_insn (gen_push (t));
11623 RTX_FRAME_RELATED_P (insn) = 1;
11625 /* For the purposes of frame and register save area addressing,
11626 we've started over with a new frame. */
11627 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11628 m->fs.realigned = true;
11630 if (static_chain)
11632 /* Replicate static chain on the stack so that static chain
11633 can be reached via (argp - 2) slot. This is needed for
11634 nested function with stack realignment. */
11635 insn = emit_insn (gen_push (static_chain));
11636 RTX_FRAME_RELATED_P (insn) = 1;
11640 int_registers_saved = (frame.nregs == 0);
11641 sse_registers_saved = (frame.nsseregs == 0);
11643 if (frame_pointer_needed && !m->fs.fp_valid)
11645 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11646 slower on all targets. Also sdb doesn't like it. */
11647 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11648 RTX_FRAME_RELATED_P (insn) = 1;
11650 /* Push registers now, before setting the frame pointer
11651 on SEH target. */
11652 if (!int_registers_saved
11653 && TARGET_SEH
11654 && !frame.save_regs_using_mov)
11656 ix86_emit_save_regs ();
11657 int_registers_saved = true;
11658 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11661 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11663 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11664 RTX_FRAME_RELATED_P (insn) = 1;
11666 if (m->fs.cfa_reg == stack_pointer_rtx)
11667 m->fs.cfa_reg = hard_frame_pointer_rtx;
11668 m->fs.fp_offset = m->fs.sp_offset;
11669 m->fs.fp_valid = true;
11673 if (!int_registers_saved)
11675 /* If saving registers via PUSH, do so now. */
11676 if (!frame.save_regs_using_mov)
11678 ix86_emit_save_regs ();
11679 int_registers_saved = true;
11680 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11683 /* When using red zone we may start register saving before allocating
11684 the stack frame saving one cycle of the prologue. However, avoid
11685 doing this if we have to probe the stack; at least on x86_64 the
11686 stack probe can turn into a call that clobbers a red zone location. */
11687 else if (ix86_using_red_zone ()
11688 && (! TARGET_STACK_PROBE
11689 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11691 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11692 int_registers_saved = true;
11696 if (stack_realign_fp)
11698 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11699 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11701 /* The computation of the size of the re-aligned stack frame means
11702 that we must allocate the size of the register save area before
11703 performing the actual alignment. Otherwise we cannot guarantee
11704 that there's enough storage above the realignment point. */
11705 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11706 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11707 GEN_INT (m->fs.sp_offset
11708 - frame.sse_reg_save_offset),
11709 -1, false);
11711 /* Align the stack. */
11712 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11713 stack_pointer_rtx,
11714 GEN_INT (-align_bytes)));
11716 /* For the purposes of register save area addressing, the stack
11717 pointer is no longer valid. As for the value of sp_offset,
11718 see ix86_compute_frame_layout, which we need to match in order
11719 to pass verification of stack_pointer_offset at the end. */
11720 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11721 m->fs.sp_valid = false;
11724 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11726 if (flag_stack_usage_info)
11728 /* We start to count from ARG_POINTER. */
11729 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11731 /* If it was realigned, take into account the fake frame. */
11732 if (stack_realign_drap)
11734 if (ix86_static_chain_on_stack)
11735 stack_size += UNITS_PER_WORD;
11737 if (!call_used_regs[REGNO (crtl->drap_reg)])
11738 stack_size += UNITS_PER_WORD;
11740 /* This over-estimates by 1 minimal-stack-alignment-unit but
11741 mitigates that by counting in the new return address slot. */
11742 current_function_dynamic_stack_size
11743 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11746 current_function_static_stack_size = stack_size;
11749 /* On SEH target with very large frame size, allocate an area to save
11750 SSE registers (as the very large allocation won't be described). */
11751 if (TARGET_SEH
11752 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11753 && !sse_registers_saved)
11755 HOST_WIDE_INT sse_size =
11756 frame.sse_reg_save_offset - frame.reg_save_offset;
11758 gcc_assert (int_registers_saved);
11760 /* No need to do stack checking as the area will be immediately
11761 written. */
11762 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11763 GEN_INT (-sse_size), -1,
11764 m->fs.cfa_reg == stack_pointer_rtx);
11765 allocate -= sse_size;
11766 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11767 sse_registers_saved = true;
11770 /* The stack has already been decremented by the instruction calling us
11771 so probe if the size is non-negative to preserve the protection area. */
11772 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11774 /* We expect the registers to be saved when probes are used. */
11775 gcc_assert (int_registers_saved);
11777 if (STACK_CHECK_MOVING_SP)
11779 if (!(crtl->is_leaf && !cfun->calls_alloca
11780 && allocate <= PROBE_INTERVAL))
11782 ix86_adjust_stack_and_probe (allocate);
11783 allocate = 0;
11786 else
11788 HOST_WIDE_INT size = allocate;
11790 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11791 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11793 if (TARGET_STACK_PROBE)
11795 if (crtl->is_leaf && !cfun->calls_alloca)
11797 if (size > PROBE_INTERVAL)
11798 ix86_emit_probe_stack_range (0, size);
11800 else
11801 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11803 else
11805 if (crtl->is_leaf && !cfun->calls_alloca)
11807 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11808 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11809 size - STACK_CHECK_PROTECT);
11811 else
11812 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11817 if (allocate == 0)
11819 else if (!ix86_target_stack_probe ()
11820 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11822 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11823 GEN_INT (-allocate), -1,
11824 m->fs.cfa_reg == stack_pointer_rtx);
11826 else
11828 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11829 rtx r10 = NULL;
11830 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11831 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11832 bool eax_live = ix86_eax_live_at_start_p ();
11833 bool r10_live = false;
11835 if (TARGET_64BIT)
11836 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11838 if (eax_live)
11840 insn = emit_insn (gen_push (eax));
11841 allocate -= UNITS_PER_WORD;
11842 /* Note that SEH directives need to continue tracking the stack
11843 pointer even after the frame pointer has been set up. */
11844 if (sp_is_cfa_reg || TARGET_SEH)
11846 if (sp_is_cfa_reg)
11847 m->fs.cfa_offset += UNITS_PER_WORD;
11848 RTX_FRAME_RELATED_P (insn) = 1;
11849 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11850 gen_rtx_SET (stack_pointer_rtx,
11851 plus_constant (Pmode, stack_pointer_rtx,
11852 -UNITS_PER_WORD)));
11856 if (r10_live)
11858 r10 = gen_rtx_REG (Pmode, R10_REG);
11859 insn = emit_insn (gen_push (r10));
11860 allocate -= UNITS_PER_WORD;
11861 if (sp_is_cfa_reg || TARGET_SEH)
11863 if (sp_is_cfa_reg)
11864 m->fs.cfa_offset += UNITS_PER_WORD;
11865 RTX_FRAME_RELATED_P (insn) = 1;
11866 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11867 gen_rtx_SET (stack_pointer_rtx,
11868 plus_constant (Pmode, stack_pointer_rtx,
11869 -UNITS_PER_WORD)));
11873 emit_move_insn (eax, GEN_INT (allocate));
11874 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11876 /* Use the fact that AX still contains ALLOCATE. */
11877 adjust_stack_insn = (Pmode == DImode
11878 ? gen_pro_epilogue_adjust_stack_di_sub
11879 : gen_pro_epilogue_adjust_stack_si_sub);
11881 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11882 stack_pointer_rtx, eax));
11884 if (sp_is_cfa_reg || TARGET_SEH)
11886 if (sp_is_cfa_reg)
11887 m->fs.cfa_offset += allocate;
11888 RTX_FRAME_RELATED_P (insn) = 1;
11889 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11890 gen_rtx_SET (stack_pointer_rtx,
11891 plus_constant (Pmode, stack_pointer_rtx,
11892 -allocate)));
11894 m->fs.sp_offset += allocate;
11896 /* Use stack_pointer_rtx for relative addressing so that code
11897 works for realigned stack, too. */
11898 if (r10_live && eax_live)
11900 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11901 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11902 gen_frame_mem (word_mode, t));
11903 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11904 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11905 gen_frame_mem (word_mode, t));
11907 else if (eax_live || r10_live)
11909 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11910 emit_move_insn (gen_rtx_REG (word_mode,
11911 (eax_live ? AX_REG : R10_REG)),
11912 gen_frame_mem (word_mode, t));
11915 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11917 /* If we havn't already set up the frame pointer, do so now. */
11918 if (frame_pointer_needed && !m->fs.fp_valid)
11920 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11921 GEN_INT (frame.stack_pointer_offset
11922 - frame.hard_frame_pointer_offset));
11923 insn = emit_insn (insn);
11924 RTX_FRAME_RELATED_P (insn) = 1;
11925 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11927 if (m->fs.cfa_reg == stack_pointer_rtx)
11928 m->fs.cfa_reg = hard_frame_pointer_rtx;
11929 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11930 m->fs.fp_valid = true;
11933 if (!int_registers_saved)
11934 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11935 if (!sse_registers_saved)
11936 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11938 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11939 in PROLOGUE. */
11940 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11942 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11943 insn = emit_insn (gen_set_got (pic));
11944 RTX_FRAME_RELATED_P (insn) = 1;
11945 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11946 emit_insn (gen_prologue_use (pic));
11947 /* Deleting already emmitted SET_GOT if exist and allocated to
11948 REAL_PIC_OFFSET_TABLE_REGNUM. */
11949 ix86_elim_entry_set_got (pic);
11952 if (crtl->drap_reg && !crtl->stack_realign_needed)
11954 /* vDRAP is setup but after reload it turns out stack realign
11955 isn't necessary, here we will emit prologue to setup DRAP
11956 without stack realign adjustment */
11957 t = choose_baseaddr (0);
11958 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11961 /* Prevent instructions from being scheduled into register save push
11962 sequence when access to the redzone area is done through frame pointer.
11963 The offset between the frame pointer and the stack pointer is calculated
11964 relative to the value of the stack pointer at the end of the function
11965 prologue, and moving instructions that access redzone area via frame
11966 pointer inside push sequence violates this assumption. */
11967 if (frame_pointer_needed && frame.red_zone_size)
11968 emit_insn (gen_memory_blockage ());
11970 /* Emit cld instruction if stringops are used in the function. */
11971 if (TARGET_CLD && ix86_current_function_needs_cld)
11972 emit_insn (gen_cld ());
11974 /* SEH requires that the prologue end within 256 bytes of the start of
11975 the function. Prevent instruction schedules that would extend that.
11976 Further, prevent alloca modifications to the stack pointer from being
11977 combined with prologue modifications. */
11978 if (TARGET_SEH)
11979 emit_insn (gen_prologue_use (stack_pointer_rtx));
11982 /* Emit code to restore REG using a POP insn. */
11984 static void
11985 ix86_emit_restore_reg_using_pop (rtx reg)
11987 struct machine_function *m = cfun->machine;
11988 rtx_insn *insn = emit_insn (gen_pop (reg));
11990 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11991 m->fs.sp_offset -= UNITS_PER_WORD;
11993 if (m->fs.cfa_reg == crtl->drap_reg
11994 && REGNO (reg) == REGNO (crtl->drap_reg))
11996 /* Previously we'd represented the CFA as an expression
11997 like *(%ebp - 8). We've just popped that value from
11998 the stack, which means we need to reset the CFA to
11999 the drap register. This will remain until we restore
12000 the stack pointer. */
12001 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12002 RTX_FRAME_RELATED_P (insn) = 1;
12004 /* This means that the DRAP register is valid for addressing too. */
12005 m->fs.drap_valid = true;
12006 return;
12009 if (m->fs.cfa_reg == stack_pointer_rtx)
12011 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12012 x = gen_rtx_SET (stack_pointer_rtx, x);
12013 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12014 RTX_FRAME_RELATED_P (insn) = 1;
12016 m->fs.cfa_offset -= UNITS_PER_WORD;
12019 /* When the frame pointer is the CFA, and we pop it, we are
12020 swapping back to the stack pointer as the CFA. This happens
12021 for stack frames that don't allocate other data, so we assume
12022 the stack pointer is now pointing at the return address, i.e.
12023 the function entry state, which makes the offset be 1 word. */
12024 if (reg == hard_frame_pointer_rtx)
12026 m->fs.fp_valid = false;
12027 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12029 m->fs.cfa_reg = stack_pointer_rtx;
12030 m->fs.cfa_offset -= UNITS_PER_WORD;
12032 add_reg_note (insn, REG_CFA_DEF_CFA,
12033 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12034 GEN_INT (m->fs.cfa_offset)));
12035 RTX_FRAME_RELATED_P (insn) = 1;
12040 /* Emit code to restore saved registers using POP insns. */
12042 static void
12043 ix86_emit_restore_regs_using_pop (void)
12045 unsigned int regno;
12047 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12048 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
12049 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
12052 /* Emit code and notes for the LEAVE instruction. */
12054 static void
12055 ix86_emit_leave (void)
12057 struct machine_function *m = cfun->machine;
12058 rtx_insn *insn = emit_insn (ix86_gen_leave ());
12060 ix86_add_queued_cfa_restore_notes (insn);
12062 gcc_assert (m->fs.fp_valid);
12063 m->fs.sp_valid = true;
12064 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
12065 m->fs.fp_valid = false;
12067 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12069 m->fs.cfa_reg = stack_pointer_rtx;
12070 m->fs.cfa_offset = m->fs.sp_offset;
12072 add_reg_note (insn, REG_CFA_DEF_CFA,
12073 plus_constant (Pmode, stack_pointer_rtx,
12074 m->fs.sp_offset));
12075 RTX_FRAME_RELATED_P (insn) = 1;
12077 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
12078 m->fs.fp_offset);
12081 /* Emit code to restore saved registers using MOV insns.
12082 First register is restored from CFA - CFA_OFFSET. */
12083 static void
12084 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
12085 bool maybe_eh_return)
12087 struct machine_function *m = cfun->machine;
12088 unsigned int regno;
12090 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12091 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12093 rtx reg = gen_rtx_REG (word_mode, regno);
12094 rtx mem;
12095 rtx_insn *insn;
12097 mem = choose_baseaddr (cfa_offset);
12098 mem = gen_frame_mem (word_mode, mem);
12099 insn = emit_move_insn (reg, mem);
12101 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12103 /* Previously we'd represented the CFA as an expression
12104 like *(%ebp - 8). We've just popped that value from
12105 the stack, which means we need to reset the CFA to
12106 the drap register. This will remain until we restore
12107 the stack pointer. */
12108 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12109 RTX_FRAME_RELATED_P (insn) = 1;
12111 /* This means that the DRAP register is valid for addressing. */
12112 m->fs.drap_valid = true;
12114 else
12115 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12117 cfa_offset -= UNITS_PER_WORD;
12121 /* Emit code to restore saved registers using MOV insns.
12122 First register is restored from CFA - CFA_OFFSET. */
12123 static void
12124 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12125 bool maybe_eh_return)
12127 unsigned int regno;
12129 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12130 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12132 rtx reg = gen_rtx_REG (V4SFmode, regno);
12133 rtx mem;
12135 mem = choose_baseaddr (cfa_offset);
12136 mem = gen_rtx_MEM (V4SFmode, mem);
12137 set_mem_align (mem, 128);
12138 emit_move_insn (reg, mem);
12140 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12142 cfa_offset -= 16;
12146 /* Restore function stack, frame, and registers. */
12148 void
12149 ix86_expand_epilogue (int style)
12151 struct machine_function *m = cfun->machine;
12152 struct machine_frame_state frame_state_save = m->fs;
12153 struct ix86_frame frame;
12154 bool restore_regs_via_mov;
12155 bool using_drap;
12157 ix86_finalize_stack_realign_flags ();
12158 ix86_compute_frame_layout (&frame);
12160 m->fs.sp_valid = (!frame_pointer_needed
12161 || (crtl->sp_is_unchanging
12162 && !stack_realign_fp));
12163 gcc_assert (!m->fs.sp_valid
12164 || m->fs.sp_offset == frame.stack_pointer_offset);
12166 /* The FP must be valid if the frame pointer is present. */
12167 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12168 gcc_assert (!m->fs.fp_valid
12169 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12171 /* We must have *some* valid pointer to the stack frame. */
12172 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12174 /* The DRAP is never valid at this point. */
12175 gcc_assert (!m->fs.drap_valid);
12177 /* See the comment about red zone and frame
12178 pointer usage in ix86_expand_prologue. */
12179 if (frame_pointer_needed && frame.red_zone_size)
12180 emit_insn (gen_memory_blockage ());
12182 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12183 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12185 /* Determine the CFA offset of the end of the red-zone. */
12186 m->fs.red_zone_offset = 0;
12187 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12189 /* The red-zone begins below the return address. */
12190 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12192 /* When the register save area is in the aligned portion of
12193 the stack, determine the maximum runtime displacement that
12194 matches up with the aligned frame. */
12195 if (stack_realign_drap)
12196 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12197 + UNITS_PER_WORD);
12200 /* Special care must be taken for the normal return case of a function
12201 using eh_return: the eax and edx registers are marked as saved, but
12202 not restored along this path. Adjust the save location to match. */
12203 if (crtl->calls_eh_return && style != 2)
12204 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12206 /* EH_RETURN requires the use of moves to function properly. */
12207 if (crtl->calls_eh_return)
12208 restore_regs_via_mov = true;
12209 /* SEH requires the use of pops to identify the epilogue. */
12210 else if (TARGET_SEH)
12211 restore_regs_via_mov = false;
12212 /* If we're only restoring one register and sp is not valid then
12213 using a move instruction to restore the register since it's
12214 less work than reloading sp and popping the register. */
12215 else if (!m->fs.sp_valid && frame.nregs <= 1)
12216 restore_regs_via_mov = true;
12217 else if (TARGET_EPILOGUE_USING_MOVE
12218 && cfun->machine->use_fast_prologue_epilogue
12219 && (frame.nregs > 1
12220 || m->fs.sp_offset != frame.reg_save_offset))
12221 restore_regs_via_mov = true;
12222 else if (frame_pointer_needed
12223 && !frame.nregs
12224 && m->fs.sp_offset != frame.reg_save_offset)
12225 restore_regs_via_mov = true;
12226 else if (frame_pointer_needed
12227 && TARGET_USE_LEAVE
12228 && cfun->machine->use_fast_prologue_epilogue
12229 && frame.nregs == 1)
12230 restore_regs_via_mov = true;
12231 else
12232 restore_regs_via_mov = false;
12234 if (restore_regs_via_mov || frame.nsseregs)
12236 /* Ensure that the entire register save area is addressable via
12237 the stack pointer, if we will restore via sp. */
12238 if (TARGET_64BIT
12239 && m->fs.sp_offset > 0x7fffffff
12240 && !(m->fs.fp_valid || m->fs.drap_valid)
12241 && (frame.nsseregs + frame.nregs) != 0)
12243 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12244 GEN_INT (m->fs.sp_offset
12245 - frame.sse_reg_save_offset),
12246 style,
12247 m->fs.cfa_reg == stack_pointer_rtx);
12251 /* If there are any SSE registers to restore, then we have to do it
12252 via moves, since there's obviously no pop for SSE regs. */
12253 if (frame.nsseregs)
12254 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12255 style == 2);
12257 if (restore_regs_via_mov)
12259 rtx t;
12261 if (frame.nregs)
12262 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12264 /* eh_return epilogues need %ecx added to the stack pointer. */
12265 if (style == 2)
12267 rtx sa = EH_RETURN_STACKADJ_RTX;
12268 rtx_insn *insn;
12270 /* Stack align doesn't work with eh_return. */
12271 gcc_assert (!stack_realign_drap);
12272 /* Neither does regparm nested functions. */
12273 gcc_assert (!ix86_static_chain_on_stack);
12275 if (frame_pointer_needed)
12277 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12278 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12279 emit_insn (gen_rtx_SET (sa, t));
12281 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12282 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12284 /* Note that we use SA as a temporary CFA, as the return
12285 address is at the proper place relative to it. We
12286 pretend this happens at the FP restore insn because
12287 prior to this insn the FP would be stored at the wrong
12288 offset relative to SA, and after this insn we have no
12289 other reasonable register to use for the CFA. We don't
12290 bother resetting the CFA to the SP for the duration of
12291 the return insn. */
12292 add_reg_note (insn, REG_CFA_DEF_CFA,
12293 plus_constant (Pmode, sa, UNITS_PER_WORD));
12294 ix86_add_queued_cfa_restore_notes (insn);
12295 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12296 RTX_FRAME_RELATED_P (insn) = 1;
12298 m->fs.cfa_reg = sa;
12299 m->fs.cfa_offset = UNITS_PER_WORD;
12300 m->fs.fp_valid = false;
12302 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12303 const0_rtx, style, false);
12305 else
12307 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12308 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12309 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12310 ix86_add_queued_cfa_restore_notes (insn);
12312 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12313 if (m->fs.cfa_offset != UNITS_PER_WORD)
12315 m->fs.cfa_offset = UNITS_PER_WORD;
12316 add_reg_note (insn, REG_CFA_DEF_CFA,
12317 plus_constant (Pmode, stack_pointer_rtx,
12318 UNITS_PER_WORD));
12319 RTX_FRAME_RELATED_P (insn) = 1;
12322 m->fs.sp_offset = UNITS_PER_WORD;
12323 m->fs.sp_valid = true;
12326 else
12328 /* SEH requires that the function end with (1) a stack adjustment
12329 if necessary, (2) a sequence of pops, and (3) a return or
12330 jump instruction. Prevent insns from the function body from
12331 being scheduled into this sequence. */
12332 if (TARGET_SEH)
12334 /* Prevent a catch region from being adjacent to the standard
12335 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12336 several other flags that would be interesting to test are
12337 not yet set up. */
12338 if (flag_non_call_exceptions)
12339 emit_insn (gen_nops (const1_rtx));
12340 else
12341 emit_insn (gen_blockage ());
12344 /* First step is to deallocate the stack frame so that we can
12345 pop the registers. Also do it on SEH target for very large
12346 frame as the emitted instructions aren't allowed by the ABI in
12347 epilogues. */
12348 if (!m->fs.sp_valid
12349 || (TARGET_SEH
12350 && (m->fs.sp_offset - frame.reg_save_offset
12351 >= SEH_MAX_FRAME_SIZE)))
12353 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12354 GEN_INT (m->fs.fp_offset
12355 - frame.reg_save_offset),
12356 style, false);
12358 else if (m->fs.sp_offset != frame.reg_save_offset)
12360 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12361 GEN_INT (m->fs.sp_offset
12362 - frame.reg_save_offset),
12363 style,
12364 m->fs.cfa_reg == stack_pointer_rtx);
12367 ix86_emit_restore_regs_using_pop ();
12370 /* If we used a stack pointer and haven't already got rid of it,
12371 then do so now. */
12372 if (m->fs.fp_valid)
12374 /* If the stack pointer is valid and pointing at the frame
12375 pointer store address, then we only need a pop. */
12376 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12377 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12378 /* Leave results in shorter dependency chains on CPUs that are
12379 able to grok it fast. */
12380 else if (TARGET_USE_LEAVE
12381 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12382 || !cfun->machine->use_fast_prologue_epilogue)
12383 ix86_emit_leave ();
12384 else
12386 pro_epilogue_adjust_stack (stack_pointer_rtx,
12387 hard_frame_pointer_rtx,
12388 const0_rtx, style, !using_drap);
12389 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12393 if (using_drap)
12395 int param_ptr_offset = UNITS_PER_WORD;
12396 rtx_insn *insn;
12398 gcc_assert (stack_realign_drap);
12400 if (ix86_static_chain_on_stack)
12401 param_ptr_offset += UNITS_PER_WORD;
12402 if (!call_used_regs[REGNO (crtl->drap_reg)])
12403 param_ptr_offset += UNITS_PER_WORD;
12405 insn = emit_insn (gen_rtx_SET
12406 (stack_pointer_rtx,
12407 gen_rtx_PLUS (Pmode,
12408 crtl->drap_reg,
12409 GEN_INT (-param_ptr_offset))));
12410 m->fs.cfa_reg = stack_pointer_rtx;
12411 m->fs.cfa_offset = param_ptr_offset;
12412 m->fs.sp_offset = param_ptr_offset;
12413 m->fs.realigned = false;
12415 add_reg_note (insn, REG_CFA_DEF_CFA,
12416 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12417 GEN_INT (param_ptr_offset)));
12418 RTX_FRAME_RELATED_P (insn) = 1;
12420 if (!call_used_regs[REGNO (crtl->drap_reg)])
12421 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12424 /* At this point the stack pointer must be valid, and we must have
12425 restored all of the registers. We may not have deallocated the
12426 entire stack frame. We've delayed this until now because it may
12427 be possible to merge the local stack deallocation with the
12428 deallocation forced by ix86_static_chain_on_stack. */
12429 gcc_assert (m->fs.sp_valid);
12430 gcc_assert (!m->fs.fp_valid);
12431 gcc_assert (!m->fs.realigned);
12432 if (m->fs.sp_offset != UNITS_PER_WORD)
12434 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12435 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12436 style, true);
12438 else
12439 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12441 /* Sibcall epilogues don't want a return instruction. */
12442 if (style == 0)
12444 m->fs = frame_state_save;
12445 return;
12448 if (crtl->args.pops_args && crtl->args.size)
12450 rtx popc = GEN_INT (crtl->args.pops_args);
12452 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12453 address, do explicit add, and jump indirectly to the caller. */
12455 if (crtl->args.pops_args >= 65536)
12457 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12458 rtx_insn *insn;
12460 /* There is no "pascal" calling convention in any 64bit ABI. */
12461 gcc_assert (!TARGET_64BIT);
12463 insn = emit_insn (gen_pop (ecx));
12464 m->fs.cfa_offset -= UNITS_PER_WORD;
12465 m->fs.sp_offset -= UNITS_PER_WORD;
12467 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12468 x = gen_rtx_SET (stack_pointer_rtx, x);
12469 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12470 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12471 RTX_FRAME_RELATED_P (insn) = 1;
12473 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12474 popc, -1, true);
12475 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12477 else
12478 emit_jump_insn (gen_simple_return_pop_internal (popc));
12480 else
12481 emit_jump_insn (gen_simple_return_internal ());
12483 /* Restore the state back to the state from the prologue,
12484 so that it's correct for the next epilogue. */
12485 m->fs = frame_state_save;
12488 /* Reset from the function's potential modifications. */
12490 static void
12491 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12493 if (pic_offset_table_rtx
12494 && !ix86_use_pseudo_pic_reg ())
12495 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12496 #if TARGET_MACHO
12497 /* Mach-O doesn't support labels at the end of objects, so if
12498 it looks like we might want one, insert a NOP. */
12500 rtx_insn *insn = get_last_insn ();
12501 rtx_insn *deleted_debug_label = NULL;
12502 while (insn
12503 && NOTE_P (insn)
12504 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12506 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12507 notes only, instead set their CODE_LABEL_NUMBER to -1,
12508 otherwise there would be code generation differences
12509 in between -g and -g0. */
12510 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12511 deleted_debug_label = insn;
12512 insn = PREV_INSN (insn);
12514 if (insn
12515 && (LABEL_P (insn)
12516 || (NOTE_P (insn)
12517 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12518 fputs ("\tnop\n", file);
12519 else if (deleted_debug_label)
12520 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12521 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12522 CODE_LABEL_NUMBER (insn) = -1;
12524 #endif
12528 /* Return a scratch register to use in the split stack prologue. The
12529 split stack prologue is used for -fsplit-stack. It is the first
12530 instructions in the function, even before the regular prologue.
12531 The scratch register can be any caller-saved register which is not
12532 used for parameters or for the static chain. */
12534 static unsigned int
12535 split_stack_prologue_scratch_regno (void)
12537 if (TARGET_64BIT)
12538 return R11_REG;
12539 else
12541 bool is_fastcall, is_thiscall;
12542 int regparm;
12544 is_fastcall = (lookup_attribute ("fastcall",
12545 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12546 != NULL);
12547 is_thiscall = (lookup_attribute ("thiscall",
12548 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12549 != NULL);
12550 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12552 if (is_fastcall)
12554 if (DECL_STATIC_CHAIN (cfun->decl))
12556 sorry ("-fsplit-stack does not support fastcall with "
12557 "nested function");
12558 return INVALID_REGNUM;
12560 return AX_REG;
12562 else if (is_thiscall)
12564 if (!DECL_STATIC_CHAIN (cfun->decl))
12565 return DX_REG;
12566 return AX_REG;
12568 else if (regparm < 3)
12570 if (!DECL_STATIC_CHAIN (cfun->decl))
12571 return CX_REG;
12572 else
12574 if (regparm >= 2)
12576 sorry ("-fsplit-stack does not support 2 register "
12577 "parameters for a nested function");
12578 return INVALID_REGNUM;
12580 return DX_REG;
12583 else
12585 /* FIXME: We could make this work by pushing a register
12586 around the addition and comparison. */
12587 sorry ("-fsplit-stack does not support 3 register parameters");
12588 return INVALID_REGNUM;
12593 /* A SYMBOL_REF for the function which allocates new stackspace for
12594 -fsplit-stack. */
12596 static GTY(()) rtx split_stack_fn;
12598 /* A SYMBOL_REF for the more stack function when using the large
12599 model. */
12601 static GTY(()) rtx split_stack_fn_large;
12603 /* Handle -fsplit-stack. These are the first instructions in the
12604 function, even before the regular prologue. */
12606 void
12607 ix86_expand_split_stack_prologue (void)
12609 struct ix86_frame frame;
12610 HOST_WIDE_INT allocate;
12611 unsigned HOST_WIDE_INT args_size;
12612 rtx_code_label *label;
12613 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12614 rtx scratch_reg = NULL_RTX;
12615 rtx_code_label *varargs_label = NULL;
12616 rtx fn;
12618 gcc_assert (flag_split_stack && reload_completed);
12620 ix86_finalize_stack_realign_flags ();
12621 ix86_compute_frame_layout (&frame);
12622 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12624 /* This is the label we will branch to if we have enough stack
12625 space. We expect the basic block reordering pass to reverse this
12626 branch if optimizing, so that we branch in the unlikely case. */
12627 label = gen_label_rtx ();
12629 /* We need to compare the stack pointer minus the frame size with
12630 the stack boundary in the TCB. The stack boundary always gives
12631 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12632 can compare directly. Otherwise we need to do an addition. */
12634 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12635 UNSPEC_STACK_CHECK);
12636 limit = gen_rtx_CONST (Pmode, limit);
12637 limit = gen_rtx_MEM (Pmode, limit);
12638 if (allocate < SPLIT_STACK_AVAILABLE)
12639 current = stack_pointer_rtx;
12640 else
12642 unsigned int scratch_regno;
12643 rtx offset;
12645 /* We need a scratch register to hold the stack pointer minus
12646 the required frame size. Since this is the very start of the
12647 function, the scratch register can be any caller-saved
12648 register which is not used for parameters. */
12649 offset = GEN_INT (- allocate);
12650 scratch_regno = split_stack_prologue_scratch_regno ();
12651 if (scratch_regno == INVALID_REGNUM)
12652 return;
12653 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12654 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12656 /* We don't use ix86_gen_add3 in this case because it will
12657 want to split to lea, but when not optimizing the insn
12658 will not be split after this point. */
12659 emit_insn (gen_rtx_SET (scratch_reg,
12660 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12661 offset)));
12663 else
12665 emit_move_insn (scratch_reg, offset);
12666 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12667 stack_pointer_rtx));
12669 current = scratch_reg;
12672 ix86_expand_branch (GEU, current, limit, label);
12673 jump_insn = get_last_insn ();
12674 JUMP_LABEL (jump_insn) = label;
12676 /* Mark the jump as very likely to be taken. */
12677 add_int_reg_note (jump_insn, REG_BR_PROB,
12678 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12680 if (split_stack_fn == NULL_RTX)
12682 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12683 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12685 fn = split_stack_fn;
12687 /* Get more stack space. We pass in the desired stack space and the
12688 size of the arguments to copy to the new stack. In 32-bit mode
12689 we push the parameters; __morestack will return on a new stack
12690 anyhow. In 64-bit mode we pass the parameters in r10 and
12691 r11. */
12692 allocate_rtx = GEN_INT (allocate);
12693 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12694 call_fusage = NULL_RTX;
12695 if (TARGET_64BIT)
12697 rtx reg10, reg11;
12699 reg10 = gen_rtx_REG (Pmode, R10_REG);
12700 reg11 = gen_rtx_REG (Pmode, R11_REG);
12702 /* If this function uses a static chain, it will be in %r10.
12703 Preserve it across the call to __morestack. */
12704 if (DECL_STATIC_CHAIN (cfun->decl))
12706 rtx rax;
12708 rax = gen_rtx_REG (word_mode, AX_REG);
12709 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12710 use_reg (&call_fusage, rax);
12713 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12714 && !TARGET_PECOFF)
12716 HOST_WIDE_INT argval;
12718 gcc_assert (Pmode == DImode);
12719 /* When using the large model we need to load the address
12720 into a register, and we've run out of registers. So we
12721 switch to a different calling convention, and we call a
12722 different function: __morestack_large. We pass the
12723 argument size in the upper 32 bits of r10 and pass the
12724 frame size in the lower 32 bits. */
12725 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12726 gcc_assert ((args_size & 0xffffffff) == args_size);
12728 if (split_stack_fn_large == NULL_RTX)
12730 split_stack_fn_large =
12731 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12732 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12734 if (ix86_cmodel == CM_LARGE_PIC)
12736 rtx_code_label *label;
12737 rtx x;
12739 label = gen_label_rtx ();
12740 emit_label (label);
12741 LABEL_PRESERVE_P (label) = 1;
12742 emit_insn (gen_set_rip_rex64 (reg10, label));
12743 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12744 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12745 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12746 UNSPEC_GOT);
12747 x = gen_rtx_CONST (Pmode, x);
12748 emit_move_insn (reg11, x);
12749 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12750 x = gen_const_mem (Pmode, x);
12751 emit_move_insn (reg11, x);
12753 else
12754 emit_move_insn (reg11, split_stack_fn_large);
12756 fn = reg11;
12758 argval = ((args_size << 16) << 16) + allocate;
12759 emit_move_insn (reg10, GEN_INT (argval));
12761 else
12763 emit_move_insn (reg10, allocate_rtx);
12764 emit_move_insn (reg11, GEN_INT (args_size));
12765 use_reg (&call_fusage, reg11);
12768 use_reg (&call_fusage, reg10);
12770 else
12772 emit_insn (gen_push (GEN_INT (args_size)));
12773 emit_insn (gen_push (allocate_rtx));
12775 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12776 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12777 NULL_RTX, false);
12778 add_function_usage_to (call_insn, call_fusage);
12780 /* In order to make call/return prediction work right, we now need
12781 to execute a return instruction. See
12782 libgcc/config/i386/morestack.S for the details on how this works.
12784 For flow purposes gcc must not see this as a return
12785 instruction--we need control flow to continue at the subsequent
12786 label. Therefore, we use an unspec. */
12787 gcc_assert (crtl->args.pops_args < 65536);
12788 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12790 /* If we are in 64-bit mode and this function uses a static chain,
12791 we saved %r10 in %rax before calling _morestack. */
12792 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12793 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12794 gen_rtx_REG (word_mode, AX_REG));
12796 /* If this function calls va_start, we need to store a pointer to
12797 the arguments on the old stack, because they may not have been
12798 all copied to the new stack. At this point the old stack can be
12799 found at the frame pointer value used by __morestack, because
12800 __morestack has set that up before calling back to us. Here we
12801 store that pointer in a scratch register, and in
12802 ix86_expand_prologue we store the scratch register in a stack
12803 slot. */
12804 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12806 unsigned int scratch_regno;
12807 rtx frame_reg;
12808 int words;
12810 scratch_regno = split_stack_prologue_scratch_regno ();
12811 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12812 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12814 /* 64-bit:
12815 fp -> old fp value
12816 return address within this function
12817 return address of caller of this function
12818 stack arguments
12819 So we add three words to get to the stack arguments.
12821 32-bit:
12822 fp -> old fp value
12823 return address within this function
12824 first argument to __morestack
12825 second argument to __morestack
12826 return address of caller of this function
12827 stack arguments
12828 So we add five words to get to the stack arguments.
12830 words = TARGET_64BIT ? 3 : 5;
12831 emit_insn (gen_rtx_SET (scratch_reg,
12832 gen_rtx_PLUS (Pmode, frame_reg,
12833 GEN_INT (words * UNITS_PER_WORD))));
12835 varargs_label = gen_label_rtx ();
12836 emit_jump_insn (gen_jump (varargs_label));
12837 JUMP_LABEL (get_last_insn ()) = varargs_label;
12839 emit_barrier ();
12842 emit_label (label);
12843 LABEL_NUSES (label) = 1;
12845 /* If this function calls va_start, we now have to set the scratch
12846 register for the case where we do not call __morestack. In this
12847 case we need to set it based on the stack pointer. */
12848 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12850 emit_insn (gen_rtx_SET (scratch_reg,
12851 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12852 GEN_INT (UNITS_PER_WORD))));
12854 emit_label (varargs_label);
12855 LABEL_NUSES (varargs_label) = 1;
12859 /* We may have to tell the dataflow pass that the split stack prologue
12860 is initializing a scratch register. */
12862 static void
12863 ix86_live_on_entry (bitmap regs)
12865 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12867 gcc_assert (flag_split_stack);
12868 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12872 /* Extract the parts of an RTL expression that is a valid memory address
12873 for an instruction. Return 0 if the structure of the address is
12874 grossly off. Return -1 if the address contains ASHIFT, so it is not
12875 strictly valid, but still used for computing length of lea instruction. */
12878 ix86_decompose_address (rtx addr, struct ix86_address *out)
12880 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12881 rtx base_reg, index_reg;
12882 HOST_WIDE_INT scale = 1;
12883 rtx scale_rtx = NULL_RTX;
12884 rtx tmp;
12885 int retval = 1;
12886 enum ix86_address_seg seg = SEG_DEFAULT;
12888 /* Allow zero-extended SImode addresses,
12889 they will be emitted with addr32 prefix. */
12890 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12892 if (GET_CODE (addr) == ZERO_EXTEND
12893 && GET_MODE (XEXP (addr, 0)) == SImode)
12895 addr = XEXP (addr, 0);
12896 if (CONST_INT_P (addr))
12897 return 0;
12899 else if (GET_CODE (addr) == AND
12900 && const_32bit_mask (XEXP (addr, 1), DImode))
12902 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12903 if (addr == NULL_RTX)
12904 return 0;
12906 if (CONST_INT_P (addr))
12907 return 0;
12911 /* Allow SImode subregs of DImode addresses,
12912 they will be emitted with addr32 prefix. */
12913 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12915 if (SUBREG_P (addr)
12916 && GET_MODE (SUBREG_REG (addr)) == DImode)
12918 addr = SUBREG_REG (addr);
12919 if (CONST_INT_P (addr))
12920 return 0;
12924 if (REG_P (addr))
12925 base = addr;
12926 else if (SUBREG_P (addr))
12928 if (REG_P (SUBREG_REG (addr)))
12929 base = addr;
12930 else
12931 return 0;
12933 else if (GET_CODE (addr) == PLUS)
12935 rtx addends[4], op;
12936 int n = 0, i;
12938 op = addr;
12941 if (n >= 4)
12942 return 0;
12943 addends[n++] = XEXP (op, 1);
12944 op = XEXP (op, 0);
12946 while (GET_CODE (op) == PLUS);
12947 if (n >= 4)
12948 return 0;
12949 addends[n] = op;
12951 for (i = n; i >= 0; --i)
12953 op = addends[i];
12954 switch (GET_CODE (op))
12956 case MULT:
12957 if (index)
12958 return 0;
12959 index = XEXP (op, 0);
12960 scale_rtx = XEXP (op, 1);
12961 break;
12963 case ASHIFT:
12964 if (index)
12965 return 0;
12966 index = XEXP (op, 0);
12967 tmp = XEXP (op, 1);
12968 if (!CONST_INT_P (tmp))
12969 return 0;
12970 scale = INTVAL (tmp);
12971 if ((unsigned HOST_WIDE_INT) scale > 3)
12972 return 0;
12973 scale = 1 << scale;
12974 break;
12976 case ZERO_EXTEND:
12977 op = XEXP (op, 0);
12978 if (GET_CODE (op) != UNSPEC)
12979 return 0;
12980 /* FALLTHRU */
12982 case UNSPEC:
12983 if (XINT (op, 1) == UNSPEC_TP
12984 && TARGET_TLS_DIRECT_SEG_REFS
12985 && seg == SEG_DEFAULT)
12986 seg = DEFAULT_TLS_SEG_REG;
12987 else
12988 return 0;
12989 break;
12991 case SUBREG:
12992 if (!REG_P (SUBREG_REG (op)))
12993 return 0;
12994 /* FALLTHRU */
12996 case REG:
12997 if (!base)
12998 base = op;
12999 else if (!index)
13000 index = op;
13001 else
13002 return 0;
13003 break;
13005 case CONST:
13006 case CONST_INT:
13007 case SYMBOL_REF:
13008 case LABEL_REF:
13009 if (disp)
13010 return 0;
13011 disp = op;
13012 break;
13014 default:
13015 return 0;
13019 else if (GET_CODE (addr) == MULT)
13021 index = XEXP (addr, 0); /* index*scale */
13022 scale_rtx = XEXP (addr, 1);
13024 else if (GET_CODE (addr) == ASHIFT)
13026 /* We're called for lea too, which implements ashift on occasion. */
13027 index = XEXP (addr, 0);
13028 tmp = XEXP (addr, 1);
13029 if (!CONST_INT_P (tmp))
13030 return 0;
13031 scale = INTVAL (tmp);
13032 if ((unsigned HOST_WIDE_INT) scale > 3)
13033 return 0;
13034 scale = 1 << scale;
13035 retval = -1;
13037 else
13038 disp = addr; /* displacement */
13040 if (index)
13042 if (REG_P (index))
13044 else if (SUBREG_P (index)
13045 && REG_P (SUBREG_REG (index)))
13047 else
13048 return 0;
13051 /* Extract the integral value of scale. */
13052 if (scale_rtx)
13054 if (!CONST_INT_P (scale_rtx))
13055 return 0;
13056 scale = INTVAL (scale_rtx);
13059 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
13060 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
13062 /* Avoid useless 0 displacement. */
13063 if (disp == const0_rtx && (base || index))
13064 disp = NULL_RTX;
13066 /* Allow arg pointer and stack pointer as index if there is not scaling. */
13067 if (base_reg && index_reg && scale == 1
13068 && (index_reg == arg_pointer_rtx
13069 || index_reg == frame_pointer_rtx
13070 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
13072 std::swap (base, index);
13073 std::swap (base_reg, index_reg);
13076 /* Special case: %ebp cannot be encoded as a base without a displacement.
13077 Similarly %r13. */
13078 if (!disp
13079 && base_reg
13080 && (base_reg == hard_frame_pointer_rtx
13081 || base_reg == frame_pointer_rtx
13082 || base_reg == arg_pointer_rtx
13083 || (REG_P (base_reg)
13084 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13085 || REGNO (base_reg) == R13_REG))))
13086 disp = const0_rtx;
13088 /* Special case: on K6, [%esi] makes the instruction vector decoded.
13089 Avoid this by transforming to [%esi+0].
13090 Reload calls address legitimization without cfun defined, so we need
13091 to test cfun for being non-NULL. */
13092 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13093 && base_reg && !index_reg && !disp
13094 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13095 disp = const0_rtx;
13097 /* Special case: encode reg+reg instead of reg*2. */
13098 if (!base && index && scale == 2)
13099 base = index, base_reg = index_reg, scale = 1;
13101 /* Special case: scaling cannot be encoded without base or displacement. */
13102 if (!base && !disp && index && scale != 1)
13103 disp = const0_rtx;
13105 out->base = base;
13106 out->index = index;
13107 out->disp = disp;
13108 out->scale = scale;
13109 out->seg = seg;
13111 return retval;
13114 /* Return cost of the memory address x.
13115 For i386, it is better to use a complex address than let gcc copy
13116 the address into a reg and make a new pseudo. But not if the address
13117 requires to two regs - that would mean more pseudos with longer
13118 lifetimes. */
13119 static int
13120 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13122 struct ix86_address parts;
13123 int cost = 1;
13124 int ok = ix86_decompose_address (x, &parts);
13126 gcc_assert (ok);
13128 if (parts.base && SUBREG_P (parts.base))
13129 parts.base = SUBREG_REG (parts.base);
13130 if (parts.index && SUBREG_P (parts.index))
13131 parts.index = SUBREG_REG (parts.index);
13133 /* Attempt to minimize number of registers in the address by increasing
13134 address cost for each used register. We don't increase address cost
13135 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13136 is not invariant itself it most likely means that base or index is not
13137 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13138 which is not profitable for x86. */
13139 if (parts.base
13140 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13141 && (current_pass->type == GIMPLE_PASS
13142 || !pic_offset_table_rtx
13143 || !REG_P (parts.base)
13144 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13145 cost++;
13147 if (parts.index
13148 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13149 && (current_pass->type == GIMPLE_PASS
13150 || !pic_offset_table_rtx
13151 || !REG_P (parts.index)
13152 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13153 cost++;
13155 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13156 since it's predecode logic can't detect the length of instructions
13157 and it degenerates to vector decoded. Increase cost of such
13158 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13159 to split such addresses or even refuse such addresses at all.
13161 Following addressing modes are affected:
13162 [base+scale*index]
13163 [scale*index+disp]
13164 [base+index]
13166 The first and last case may be avoidable by explicitly coding the zero in
13167 memory address, but I don't have AMD-K6 machine handy to check this
13168 theory. */
13170 if (TARGET_K6
13171 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13172 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13173 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13174 cost += 10;
13176 return cost;
13179 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13180 this is used for to form addresses to local data when -fPIC is in
13181 use. */
13183 static bool
13184 darwin_local_data_pic (rtx disp)
13186 return (GET_CODE (disp) == UNSPEC
13187 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13190 /* Determine if a given RTX is a valid constant. We already know this
13191 satisfies CONSTANT_P. */
13193 static bool
13194 ix86_legitimate_constant_p (machine_mode, rtx x)
13196 /* Pointer bounds constants are not valid. */
13197 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13198 return false;
13200 switch (GET_CODE (x))
13202 case CONST:
13203 x = XEXP (x, 0);
13205 if (GET_CODE (x) == PLUS)
13207 if (!CONST_INT_P (XEXP (x, 1)))
13208 return false;
13209 x = XEXP (x, 0);
13212 if (TARGET_MACHO && darwin_local_data_pic (x))
13213 return true;
13215 /* Only some unspecs are valid as "constants". */
13216 if (GET_CODE (x) == UNSPEC)
13217 switch (XINT (x, 1))
13219 case UNSPEC_GOT:
13220 case UNSPEC_GOTOFF:
13221 case UNSPEC_PLTOFF:
13222 return TARGET_64BIT;
13223 case UNSPEC_TPOFF:
13224 case UNSPEC_NTPOFF:
13225 x = XVECEXP (x, 0, 0);
13226 return (GET_CODE (x) == SYMBOL_REF
13227 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13228 case UNSPEC_DTPOFF:
13229 x = XVECEXP (x, 0, 0);
13230 return (GET_CODE (x) == SYMBOL_REF
13231 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13232 default:
13233 return false;
13236 /* We must have drilled down to a symbol. */
13237 if (GET_CODE (x) == LABEL_REF)
13238 return true;
13239 if (GET_CODE (x) != SYMBOL_REF)
13240 return false;
13241 /* FALLTHRU */
13243 case SYMBOL_REF:
13244 /* TLS symbols are never valid. */
13245 if (SYMBOL_REF_TLS_MODEL (x))
13246 return false;
13248 /* DLLIMPORT symbols are never valid. */
13249 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13250 && SYMBOL_REF_DLLIMPORT_P (x))
13251 return false;
13253 #if TARGET_MACHO
13254 /* mdynamic-no-pic */
13255 if (MACHO_DYNAMIC_NO_PIC_P)
13256 return machopic_symbol_defined_p (x);
13257 #endif
13258 break;
13260 case CONST_WIDE_INT:
13261 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13262 return false;
13263 break;
13265 case CONST_VECTOR:
13266 if (!standard_sse_constant_p (x))
13267 return false;
13269 default:
13270 break;
13273 /* Otherwise we handle everything else in the move patterns. */
13274 return true;
13277 /* Determine if it's legal to put X into the constant pool. This
13278 is not possible for the address of thread-local symbols, which
13279 is checked above. */
13281 static bool
13282 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13284 /* We can always put integral constants and vectors in memory. */
13285 switch (GET_CODE (x))
13287 case CONST_INT:
13288 case CONST_WIDE_INT:
13289 case CONST_DOUBLE:
13290 case CONST_VECTOR:
13291 return false;
13293 default:
13294 break;
13296 return !ix86_legitimate_constant_p (mode, x);
13299 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13300 otherwise zero. */
13302 static bool
13303 is_imported_p (rtx x)
13305 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13306 || GET_CODE (x) != SYMBOL_REF)
13307 return false;
13309 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13313 /* Nonzero if the constant value X is a legitimate general operand
13314 when generating PIC code. It is given that flag_pic is on and
13315 that X satisfies CONSTANT_P. */
13317 bool
13318 legitimate_pic_operand_p (rtx x)
13320 rtx inner;
13322 switch (GET_CODE (x))
13324 case CONST:
13325 inner = XEXP (x, 0);
13326 if (GET_CODE (inner) == PLUS
13327 && CONST_INT_P (XEXP (inner, 1)))
13328 inner = XEXP (inner, 0);
13330 /* Only some unspecs are valid as "constants". */
13331 if (GET_CODE (inner) == UNSPEC)
13332 switch (XINT (inner, 1))
13334 case UNSPEC_GOT:
13335 case UNSPEC_GOTOFF:
13336 case UNSPEC_PLTOFF:
13337 return TARGET_64BIT;
13338 case UNSPEC_TPOFF:
13339 x = XVECEXP (inner, 0, 0);
13340 return (GET_CODE (x) == SYMBOL_REF
13341 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13342 case UNSPEC_MACHOPIC_OFFSET:
13343 return legitimate_pic_address_disp_p (x);
13344 default:
13345 return false;
13347 /* FALLTHRU */
13349 case SYMBOL_REF:
13350 case LABEL_REF:
13351 return legitimate_pic_address_disp_p (x);
13353 default:
13354 return true;
13358 /* Determine if a given CONST RTX is a valid memory displacement
13359 in PIC mode. */
13361 bool
13362 legitimate_pic_address_disp_p (rtx disp)
13364 bool saw_plus;
13366 /* In 64bit mode we can allow direct addresses of symbols and labels
13367 when they are not dynamic symbols. */
13368 if (TARGET_64BIT)
13370 rtx op0 = disp, op1;
13372 switch (GET_CODE (disp))
13374 case LABEL_REF:
13375 return true;
13377 case CONST:
13378 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13379 break;
13380 op0 = XEXP (XEXP (disp, 0), 0);
13381 op1 = XEXP (XEXP (disp, 0), 1);
13382 if (!CONST_INT_P (op1)
13383 || INTVAL (op1) >= 16*1024*1024
13384 || INTVAL (op1) < -16*1024*1024)
13385 break;
13386 if (GET_CODE (op0) == LABEL_REF)
13387 return true;
13388 if (GET_CODE (op0) == CONST
13389 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13390 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13391 return true;
13392 if (GET_CODE (op0) == UNSPEC
13393 && XINT (op0, 1) == UNSPEC_PCREL)
13394 return true;
13395 if (GET_CODE (op0) != SYMBOL_REF)
13396 break;
13397 /* FALLTHRU */
13399 case SYMBOL_REF:
13400 /* TLS references should always be enclosed in UNSPEC.
13401 The dllimported symbol needs always to be resolved. */
13402 if (SYMBOL_REF_TLS_MODEL (op0)
13403 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13404 return false;
13406 if (TARGET_PECOFF)
13408 if (is_imported_p (op0))
13409 return true;
13411 if (SYMBOL_REF_FAR_ADDR_P (op0)
13412 || !SYMBOL_REF_LOCAL_P (op0))
13413 break;
13415 /* Function-symbols need to be resolved only for
13416 large-model.
13417 For the small-model we don't need to resolve anything
13418 here. */
13419 if ((ix86_cmodel != CM_LARGE_PIC
13420 && SYMBOL_REF_FUNCTION_P (op0))
13421 || ix86_cmodel == CM_SMALL_PIC)
13422 return true;
13423 /* Non-external symbols don't need to be resolved for
13424 large, and medium-model. */
13425 if ((ix86_cmodel == CM_LARGE_PIC
13426 || ix86_cmodel == CM_MEDIUM_PIC)
13427 && !SYMBOL_REF_EXTERNAL_P (op0))
13428 return true;
13430 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13431 && (SYMBOL_REF_LOCAL_P (op0)
13432 || (HAVE_LD_PIE_COPYRELOC
13433 && flag_pie
13434 && !SYMBOL_REF_WEAK (op0)
13435 && !SYMBOL_REF_FUNCTION_P (op0)))
13436 && ix86_cmodel != CM_LARGE_PIC)
13437 return true;
13438 break;
13440 default:
13441 break;
13444 if (GET_CODE (disp) != CONST)
13445 return false;
13446 disp = XEXP (disp, 0);
13448 if (TARGET_64BIT)
13450 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13451 of GOT tables. We should not need these anyway. */
13452 if (GET_CODE (disp) != UNSPEC
13453 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13454 && XINT (disp, 1) != UNSPEC_GOTOFF
13455 && XINT (disp, 1) != UNSPEC_PCREL
13456 && XINT (disp, 1) != UNSPEC_PLTOFF))
13457 return false;
13459 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13460 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13461 return false;
13462 return true;
13465 saw_plus = false;
13466 if (GET_CODE (disp) == PLUS)
13468 if (!CONST_INT_P (XEXP (disp, 1)))
13469 return false;
13470 disp = XEXP (disp, 0);
13471 saw_plus = true;
13474 if (TARGET_MACHO && darwin_local_data_pic (disp))
13475 return true;
13477 if (GET_CODE (disp) != UNSPEC)
13478 return false;
13480 switch (XINT (disp, 1))
13482 case UNSPEC_GOT:
13483 if (saw_plus)
13484 return false;
13485 /* We need to check for both symbols and labels because VxWorks loads
13486 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13487 details. */
13488 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13489 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13490 case UNSPEC_GOTOFF:
13491 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13492 While ABI specify also 32bit relocation but we don't produce it in
13493 small PIC model at all. */
13494 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13495 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13496 && !TARGET_64BIT)
13497 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13498 return false;
13499 case UNSPEC_GOTTPOFF:
13500 case UNSPEC_GOTNTPOFF:
13501 case UNSPEC_INDNTPOFF:
13502 if (saw_plus)
13503 return false;
13504 disp = XVECEXP (disp, 0, 0);
13505 return (GET_CODE (disp) == SYMBOL_REF
13506 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13507 case UNSPEC_NTPOFF:
13508 disp = XVECEXP (disp, 0, 0);
13509 return (GET_CODE (disp) == SYMBOL_REF
13510 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13511 case UNSPEC_DTPOFF:
13512 disp = XVECEXP (disp, 0, 0);
13513 return (GET_CODE (disp) == SYMBOL_REF
13514 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13517 return false;
13520 /* Determine if op is suitable RTX for an address register.
13521 Return naked register if a register or a register subreg is
13522 found, otherwise return NULL_RTX. */
13524 static rtx
13525 ix86_validate_address_register (rtx op)
13527 machine_mode mode = GET_MODE (op);
13529 /* Only SImode or DImode registers can form the address. */
13530 if (mode != SImode && mode != DImode)
13531 return NULL_RTX;
13533 if (REG_P (op))
13534 return op;
13535 else if (SUBREG_P (op))
13537 rtx reg = SUBREG_REG (op);
13539 if (!REG_P (reg))
13540 return NULL_RTX;
13542 mode = GET_MODE (reg);
13544 /* Don't allow SUBREGs that span more than a word. It can
13545 lead to spill failures when the register is one word out
13546 of a two word structure. */
13547 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13548 return NULL_RTX;
13550 /* Allow only SUBREGs of non-eliminable hard registers. */
13551 if (register_no_elim_operand (reg, mode))
13552 return reg;
13555 /* Op is not a register. */
13556 return NULL_RTX;
13559 /* Recognizes RTL expressions that are valid memory addresses for an
13560 instruction. The MODE argument is the machine mode for the MEM
13561 expression that wants to use this address.
13563 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13564 convert common non-canonical forms to canonical form so that they will
13565 be recognized. */
13567 static bool
13568 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13570 struct ix86_address parts;
13571 rtx base, index, disp;
13572 HOST_WIDE_INT scale;
13573 enum ix86_address_seg seg;
13575 if (ix86_decompose_address (addr, &parts) <= 0)
13576 /* Decomposition failed. */
13577 return false;
13579 base = parts.base;
13580 index = parts.index;
13581 disp = parts.disp;
13582 scale = parts.scale;
13583 seg = parts.seg;
13585 /* Validate base register. */
13586 if (base)
13588 rtx reg = ix86_validate_address_register (base);
13590 if (reg == NULL_RTX)
13591 return false;
13593 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13594 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13595 /* Base is not valid. */
13596 return false;
13599 /* Validate index register. */
13600 if (index)
13602 rtx reg = ix86_validate_address_register (index);
13604 if (reg == NULL_RTX)
13605 return false;
13607 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13608 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13609 /* Index is not valid. */
13610 return false;
13613 /* Index and base should have the same mode. */
13614 if (base && index
13615 && GET_MODE (base) != GET_MODE (index))
13616 return false;
13618 /* Address override works only on the (%reg) part of %fs:(%reg). */
13619 if (seg != SEG_DEFAULT
13620 && ((base && GET_MODE (base) != word_mode)
13621 || (index && GET_MODE (index) != word_mode)))
13622 return false;
13624 /* Validate scale factor. */
13625 if (scale != 1)
13627 if (!index)
13628 /* Scale without index. */
13629 return false;
13631 if (scale != 2 && scale != 4 && scale != 8)
13632 /* Scale is not a valid multiplier. */
13633 return false;
13636 /* Validate displacement. */
13637 if (disp)
13639 if (GET_CODE (disp) == CONST
13640 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13641 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13642 switch (XINT (XEXP (disp, 0), 1))
13644 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13645 used. While ABI specify also 32bit relocations, we don't produce
13646 them at all and use IP relative instead. */
13647 case UNSPEC_GOT:
13648 case UNSPEC_GOTOFF:
13649 gcc_assert (flag_pic);
13650 if (!TARGET_64BIT)
13651 goto is_legitimate_pic;
13653 /* 64bit address unspec. */
13654 return false;
13656 case UNSPEC_GOTPCREL:
13657 case UNSPEC_PCREL:
13658 gcc_assert (flag_pic);
13659 goto is_legitimate_pic;
13661 case UNSPEC_GOTTPOFF:
13662 case UNSPEC_GOTNTPOFF:
13663 case UNSPEC_INDNTPOFF:
13664 case UNSPEC_NTPOFF:
13665 case UNSPEC_DTPOFF:
13666 break;
13668 case UNSPEC_STACK_CHECK:
13669 gcc_assert (flag_split_stack);
13670 break;
13672 default:
13673 /* Invalid address unspec. */
13674 return false;
13677 else if (SYMBOLIC_CONST (disp)
13678 && (flag_pic
13679 || (TARGET_MACHO
13680 #if TARGET_MACHO
13681 && MACHOPIC_INDIRECT
13682 && !machopic_operand_p (disp)
13683 #endif
13687 is_legitimate_pic:
13688 if (TARGET_64BIT && (index || base))
13690 /* foo@dtpoff(%rX) is ok. */
13691 if (GET_CODE (disp) != CONST
13692 || GET_CODE (XEXP (disp, 0)) != PLUS
13693 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13694 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13695 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13696 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13697 /* Non-constant pic memory reference. */
13698 return false;
13700 else if ((!TARGET_MACHO || flag_pic)
13701 && ! legitimate_pic_address_disp_p (disp))
13702 /* Displacement is an invalid pic construct. */
13703 return false;
13704 #if TARGET_MACHO
13705 else if (MACHO_DYNAMIC_NO_PIC_P
13706 && !ix86_legitimate_constant_p (Pmode, disp))
13707 /* displacment must be referenced via non_lazy_pointer */
13708 return false;
13709 #endif
13711 /* This code used to verify that a symbolic pic displacement
13712 includes the pic_offset_table_rtx register.
13714 While this is good idea, unfortunately these constructs may
13715 be created by "adds using lea" optimization for incorrect
13716 code like:
13718 int a;
13719 int foo(int i)
13721 return *(&a+i);
13724 This code is nonsensical, but results in addressing
13725 GOT table with pic_offset_table_rtx base. We can't
13726 just refuse it easily, since it gets matched by
13727 "addsi3" pattern, that later gets split to lea in the
13728 case output register differs from input. While this
13729 can be handled by separate addsi pattern for this case
13730 that never results in lea, this seems to be easier and
13731 correct fix for crash to disable this test. */
13733 else if (GET_CODE (disp) != LABEL_REF
13734 && !CONST_INT_P (disp)
13735 && (GET_CODE (disp) != CONST
13736 || !ix86_legitimate_constant_p (Pmode, disp))
13737 && (GET_CODE (disp) != SYMBOL_REF
13738 || !ix86_legitimate_constant_p (Pmode, disp)))
13739 /* Displacement is not constant. */
13740 return false;
13741 else if (TARGET_64BIT
13742 && !x86_64_immediate_operand (disp, VOIDmode))
13743 /* Displacement is out of range. */
13744 return false;
13745 /* In x32 mode, constant addresses are sign extended to 64bit, so
13746 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13747 else if (TARGET_X32 && !(index || base)
13748 && CONST_INT_P (disp)
13749 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13750 return false;
13753 /* Everything looks valid. */
13754 return true;
13757 /* Determine if a given RTX is a valid constant address. */
13759 bool
13760 constant_address_p (rtx x)
13762 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13765 /* Return a unique alias set for the GOT. */
13767 static alias_set_type
13768 ix86_GOT_alias_set (void)
13770 static alias_set_type set = -1;
13771 if (set == -1)
13772 set = new_alias_set ();
13773 return set;
13776 /* Return a legitimate reference for ORIG (an address) using the
13777 register REG. If REG is 0, a new pseudo is generated.
13779 There are two types of references that must be handled:
13781 1. Global data references must load the address from the GOT, via
13782 the PIC reg. An insn is emitted to do this load, and the reg is
13783 returned.
13785 2. Static data references, constant pool addresses, and code labels
13786 compute the address as an offset from the GOT, whose base is in
13787 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13788 differentiate them from global data objects. The returned
13789 address is the PIC reg + an unspec constant.
13791 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13792 reg also appears in the address. */
13794 static rtx
13795 legitimize_pic_address (rtx orig, rtx reg)
13797 rtx addr = orig;
13798 rtx new_rtx = orig;
13800 #if TARGET_MACHO
13801 if (TARGET_MACHO && !TARGET_64BIT)
13803 if (reg == 0)
13804 reg = gen_reg_rtx (Pmode);
13805 /* Use the generic Mach-O PIC machinery. */
13806 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13808 #endif
13810 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13812 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13813 if (tmp)
13814 return tmp;
13817 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13818 new_rtx = addr;
13819 else if (TARGET_64BIT && !TARGET_PECOFF
13820 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13822 rtx tmpreg;
13823 /* This symbol may be referenced via a displacement from the PIC
13824 base address (@GOTOFF). */
13826 if (GET_CODE (addr) == CONST)
13827 addr = XEXP (addr, 0);
13828 if (GET_CODE (addr) == PLUS)
13830 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13831 UNSPEC_GOTOFF);
13832 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13834 else
13835 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13836 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13837 if (!reg)
13838 tmpreg = gen_reg_rtx (Pmode);
13839 else
13840 tmpreg = reg;
13841 emit_move_insn (tmpreg, new_rtx);
13843 if (reg != 0)
13845 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13846 tmpreg, 1, OPTAB_DIRECT);
13847 new_rtx = reg;
13849 else
13850 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13852 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13854 /* This symbol may be referenced via a displacement from the PIC
13855 base address (@GOTOFF). */
13857 if (GET_CODE (addr) == CONST)
13858 addr = XEXP (addr, 0);
13859 if (GET_CODE (addr) == PLUS)
13861 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13862 UNSPEC_GOTOFF);
13863 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13865 else
13866 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13867 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13868 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13870 if (reg != 0)
13872 emit_move_insn (reg, new_rtx);
13873 new_rtx = reg;
13876 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13877 /* We can't use @GOTOFF for text labels on VxWorks;
13878 see gotoff_operand. */
13879 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13881 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13882 if (tmp)
13883 return tmp;
13885 /* For x64 PE-COFF there is no GOT table. So we use address
13886 directly. */
13887 if (TARGET_64BIT && TARGET_PECOFF)
13889 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13890 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13892 if (reg == 0)
13893 reg = gen_reg_rtx (Pmode);
13894 emit_move_insn (reg, new_rtx);
13895 new_rtx = reg;
13897 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13899 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13900 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13901 new_rtx = gen_const_mem (Pmode, new_rtx);
13902 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13904 if (reg == 0)
13905 reg = gen_reg_rtx (Pmode);
13906 /* Use directly gen_movsi, otherwise the address is loaded
13907 into register for CSE. We don't want to CSE this addresses,
13908 instead we CSE addresses from the GOT table, so skip this. */
13909 emit_insn (gen_movsi (reg, new_rtx));
13910 new_rtx = reg;
13912 else
13914 /* This symbol must be referenced via a load from the
13915 Global Offset Table (@GOT). */
13917 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13918 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13919 if (TARGET_64BIT)
13920 new_rtx = force_reg (Pmode, new_rtx);
13921 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13922 new_rtx = gen_const_mem (Pmode, new_rtx);
13923 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13925 if (reg == 0)
13926 reg = gen_reg_rtx (Pmode);
13927 emit_move_insn (reg, new_rtx);
13928 new_rtx = reg;
13931 else
13933 if (CONST_INT_P (addr)
13934 && !x86_64_immediate_operand (addr, VOIDmode))
13936 if (reg)
13938 emit_move_insn (reg, addr);
13939 new_rtx = reg;
13941 else
13942 new_rtx = force_reg (Pmode, addr);
13944 else if (GET_CODE (addr) == CONST)
13946 addr = XEXP (addr, 0);
13948 /* We must match stuff we generate before. Assume the only
13949 unspecs that can get here are ours. Not that we could do
13950 anything with them anyway.... */
13951 if (GET_CODE (addr) == UNSPEC
13952 || (GET_CODE (addr) == PLUS
13953 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13954 return orig;
13955 gcc_assert (GET_CODE (addr) == PLUS);
13957 if (GET_CODE (addr) == PLUS)
13959 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13961 /* Check first to see if this is a constant offset from a @GOTOFF
13962 symbol reference. */
13963 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13964 && CONST_INT_P (op1))
13966 if (!TARGET_64BIT)
13968 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13969 UNSPEC_GOTOFF);
13970 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13971 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13972 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13974 if (reg != 0)
13976 emit_move_insn (reg, new_rtx);
13977 new_rtx = reg;
13980 else
13982 if (INTVAL (op1) < -16*1024*1024
13983 || INTVAL (op1) >= 16*1024*1024)
13985 if (!x86_64_immediate_operand (op1, Pmode))
13986 op1 = force_reg (Pmode, op1);
13987 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13991 else
13993 rtx base = legitimize_pic_address (op0, reg);
13994 machine_mode mode = GET_MODE (base);
13995 new_rtx
13996 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13998 if (CONST_INT_P (new_rtx))
14000 if (INTVAL (new_rtx) < -16*1024*1024
14001 || INTVAL (new_rtx) >= 16*1024*1024)
14003 if (!x86_64_immediate_operand (new_rtx, mode))
14004 new_rtx = force_reg (mode, new_rtx);
14005 new_rtx
14006 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
14008 else
14009 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
14011 else
14013 /* For %rip addressing, we have to use just disp32, not
14014 base nor index. */
14015 if (TARGET_64BIT
14016 && (GET_CODE (base) == SYMBOL_REF
14017 || GET_CODE (base) == LABEL_REF))
14018 base = force_reg (mode, base);
14019 if (GET_CODE (new_rtx) == PLUS
14020 && CONSTANT_P (XEXP (new_rtx, 1)))
14022 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
14023 new_rtx = XEXP (new_rtx, 1);
14025 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
14030 return new_rtx;
14033 /* Load the thread pointer. If TO_REG is true, force it into a register. */
14035 static rtx
14036 get_thread_pointer (machine_mode tp_mode, bool to_reg)
14038 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
14040 if (GET_MODE (tp) != tp_mode)
14042 gcc_assert (GET_MODE (tp) == SImode);
14043 gcc_assert (tp_mode == DImode);
14045 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
14048 if (to_reg)
14049 tp = copy_to_mode_reg (tp_mode, tp);
14051 return tp;
14054 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14056 static GTY(()) rtx ix86_tls_symbol;
14058 static rtx
14059 ix86_tls_get_addr (void)
14061 if (!ix86_tls_symbol)
14063 const char *sym
14064 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
14065 ? "___tls_get_addr" : "__tls_get_addr");
14067 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14070 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14072 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14073 UNSPEC_PLTOFF);
14074 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14075 gen_rtx_CONST (Pmode, unspec));
14078 return ix86_tls_symbol;
14081 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14083 static GTY(()) rtx ix86_tls_module_base_symbol;
14086 ix86_tls_module_base (void)
14088 if (!ix86_tls_module_base_symbol)
14090 ix86_tls_module_base_symbol
14091 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14093 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14094 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14097 return ix86_tls_module_base_symbol;
14100 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
14101 false if we expect this to be used for a memory address and true if
14102 we expect to load the address into a register. */
14104 static rtx
14105 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14107 rtx dest, base, off;
14108 rtx pic = NULL_RTX, tp = NULL_RTX;
14109 machine_mode tp_mode = Pmode;
14110 int type;
14112 /* Fall back to global dynamic model if tool chain cannot support local
14113 dynamic. */
14114 if (TARGET_SUN_TLS && !TARGET_64BIT
14115 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14116 && model == TLS_MODEL_LOCAL_DYNAMIC)
14117 model = TLS_MODEL_GLOBAL_DYNAMIC;
14119 switch (model)
14121 case TLS_MODEL_GLOBAL_DYNAMIC:
14122 dest = gen_reg_rtx (Pmode);
14124 if (!TARGET_64BIT)
14126 if (flag_pic && !TARGET_PECOFF)
14127 pic = pic_offset_table_rtx;
14128 else
14130 pic = gen_reg_rtx (Pmode);
14131 emit_insn (gen_set_got (pic));
14135 if (TARGET_GNU2_TLS)
14137 if (TARGET_64BIT)
14138 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14139 else
14140 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14142 tp = get_thread_pointer (Pmode, true);
14143 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14145 if (GET_MODE (x) != Pmode)
14146 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14148 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14150 else
14152 rtx caddr = ix86_tls_get_addr ();
14154 if (TARGET_64BIT)
14156 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14157 rtx_insn *insns;
14159 start_sequence ();
14160 emit_call_insn
14161 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14162 insns = get_insns ();
14163 end_sequence ();
14165 if (GET_MODE (x) != Pmode)
14166 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14168 RTL_CONST_CALL_P (insns) = 1;
14169 emit_libcall_block (insns, dest, rax, x);
14171 else
14172 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14174 break;
14176 case TLS_MODEL_LOCAL_DYNAMIC:
14177 base = gen_reg_rtx (Pmode);
14179 if (!TARGET_64BIT)
14181 if (flag_pic)
14182 pic = pic_offset_table_rtx;
14183 else
14185 pic = gen_reg_rtx (Pmode);
14186 emit_insn (gen_set_got (pic));
14190 if (TARGET_GNU2_TLS)
14192 rtx tmp = ix86_tls_module_base ();
14194 if (TARGET_64BIT)
14195 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14196 else
14197 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14199 tp = get_thread_pointer (Pmode, true);
14200 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14201 gen_rtx_MINUS (Pmode, tmp, tp));
14203 else
14205 rtx caddr = ix86_tls_get_addr ();
14207 if (TARGET_64BIT)
14209 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14210 rtx_insn *insns;
14211 rtx eqv;
14213 start_sequence ();
14214 emit_call_insn
14215 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14216 insns = get_insns ();
14217 end_sequence ();
14219 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14220 share the LD_BASE result with other LD model accesses. */
14221 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14222 UNSPEC_TLS_LD_BASE);
14224 RTL_CONST_CALL_P (insns) = 1;
14225 emit_libcall_block (insns, base, rax, eqv);
14227 else
14228 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14231 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14232 off = gen_rtx_CONST (Pmode, off);
14234 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14236 if (TARGET_GNU2_TLS)
14238 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14240 if (GET_MODE (x) != Pmode)
14241 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14243 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14245 break;
14247 case TLS_MODEL_INITIAL_EXEC:
14248 if (TARGET_64BIT)
14250 if (TARGET_SUN_TLS && !TARGET_X32)
14252 /* The Sun linker took the AMD64 TLS spec literally
14253 and can only handle %rax as destination of the
14254 initial executable code sequence. */
14256 dest = gen_reg_rtx (DImode);
14257 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14258 return dest;
14261 /* Generate DImode references to avoid %fs:(%reg32)
14262 problems and linker IE->LE relaxation bug. */
14263 tp_mode = DImode;
14264 pic = NULL;
14265 type = UNSPEC_GOTNTPOFF;
14267 else if (flag_pic)
14269 pic = pic_offset_table_rtx;
14270 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14272 else if (!TARGET_ANY_GNU_TLS)
14274 pic = gen_reg_rtx (Pmode);
14275 emit_insn (gen_set_got (pic));
14276 type = UNSPEC_GOTTPOFF;
14278 else
14280 pic = NULL;
14281 type = UNSPEC_INDNTPOFF;
14284 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14285 off = gen_rtx_CONST (tp_mode, off);
14286 if (pic)
14287 off = gen_rtx_PLUS (tp_mode, pic, off);
14288 off = gen_const_mem (tp_mode, off);
14289 set_mem_alias_set (off, ix86_GOT_alias_set ());
14291 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14293 base = get_thread_pointer (tp_mode,
14294 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14295 off = force_reg (tp_mode, off);
14296 return gen_rtx_PLUS (tp_mode, base, off);
14298 else
14300 base = get_thread_pointer (Pmode, true);
14301 dest = gen_reg_rtx (Pmode);
14302 emit_insn (ix86_gen_sub3 (dest, base, off));
14304 break;
14306 case TLS_MODEL_LOCAL_EXEC:
14307 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14308 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14309 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14310 off = gen_rtx_CONST (Pmode, off);
14312 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14314 base = get_thread_pointer (Pmode,
14315 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14316 return gen_rtx_PLUS (Pmode, base, off);
14318 else
14320 base = get_thread_pointer (Pmode, true);
14321 dest = gen_reg_rtx (Pmode);
14322 emit_insn (ix86_gen_sub3 (dest, base, off));
14324 break;
14326 default:
14327 gcc_unreachable ();
14330 return dest;
14333 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14334 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14335 unique refptr-DECL symbol corresponding to symbol DECL. */
14337 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
14339 static inline hashval_t hash (tree_map *m) { return m->hash; }
14340 static inline bool
14341 equal (tree_map *a, tree_map *b)
14343 return a->base.from == b->base.from;
14346 static int
14347 keep_cache_entry (tree_map *&m)
14349 return ggc_marked_p (m->base.from);
14353 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14355 static tree
14356 get_dllimport_decl (tree decl, bool beimport)
14358 struct tree_map *h, in;
14359 const char *name;
14360 const char *prefix;
14361 size_t namelen, prefixlen;
14362 char *imp_name;
14363 tree to;
14364 rtx rtl;
14366 if (!dllimport_map)
14367 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14369 in.hash = htab_hash_pointer (decl);
14370 in.base.from = decl;
14371 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14372 h = *loc;
14373 if (h)
14374 return h->to;
14376 *loc = h = ggc_alloc<tree_map> ();
14377 h->hash = in.hash;
14378 h->base.from = decl;
14379 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14380 VAR_DECL, NULL, ptr_type_node);
14381 DECL_ARTIFICIAL (to) = 1;
14382 DECL_IGNORED_P (to) = 1;
14383 DECL_EXTERNAL (to) = 1;
14384 TREE_READONLY (to) = 1;
14386 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14387 name = targetm.strip_name_encoding (name);
14388 if (beimport)
14389 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14390 ? "*__imp_" : "*__imp__";
14391 else
14392 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14393 namelen = strlen (name);
14394 prefixlen = strlen (prefix);
14395 imp_name = (char *) alloca (namelen + prefixlen + 1);
14396 memcpy (imp_name, prefix, prefixlen);
14397 memcpy (imp_name + prefixlen, name, namelen + 1);
14399 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14400 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14401 SET_SYMBOL_REF_DECL (rtl, to);
14402 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14403 if (!beimport)
14405 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14406 #ifdef SUB_TARGET_RECORD_STUB
14407 SUB_TARGET_RECORD_STUB (name);
14408 #endif
14411 rtl = gen_const_mem (Pmode, rtl);
14412 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14414 SET_DECL_RTL (to, rtl);
14415 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14417 return to;
14420 /* Expand SYMBOL into its corresponding far-addresse symbol.
14421 WANT_REG is true if we require the result be a register. */
14423 static rtx
14424 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14426 tree imp_decl;
14427 rtx x;
14429 gcc_assert (SYMBOL_REF_DECL (symbol));
14430 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14432 x = DECL_RTL (imp_decl);
14433 if (want_reg)
14434 x = force_reg (Pmode, x);
14435 return x;
14438 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14439 true if we require the result be a register. */
14441 static rtx
14442 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14444 tree imp_decl;
14445 rtx x;
14447 gcc_assert (SYMBOL_REF_DECL (symbol));
14448 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14450 x = DECL_RTL (imp_decl);
14451 if (want_reg)
14452 x = force_reg (Pmode, x);
14453 return x;
14456 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14457 is true if we require the result be a register. */
14459 static rtx
14460 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14462 if (!TARGET_PECOFF)
14463 return NULL_RTX;
14465 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14467 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14468 return legitimize_dllimport_symbol (addr, inreg);
14469 if (GET_CODE (addr) == CONST
14470 && GET_CODE (XEXP (addr, 0)) == PLUS
14471 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14472 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14474 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14475 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14479 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14480 return NULL_RTX;
14481 if (GET_CODE (addr) == SYMBOL_REF
14482 && !is_imported_p (addr)
14483 && SYMBOL_REF_EXTERNAL_P (addr)
14484 && SYMBOL_REF_DECL (addr))
14485 return legitimize_pe_coff_extern_decl (addr, inreg);
14487 if (GET_CODE (addr) == CONST
14488 && GET_CODE (XEXP (addr, 0)) == PLUS
14489 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14490 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14491 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14492 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14494 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14495 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14497 return NULL_RTX;
14500 /* Try machine-dependent ways of modifying an illegitimate address
14501 to be legitimate. If we find one, return the new, valid address.
14502 This macro is used in only one place: `memory_address' in explow.c.
14504 OLDX is the address as it was before break_out_memory_refs was called.
14505 In some cases it is useful to look at this to decide what needs to be done.
14507 It is always safe for this macro to do nothing. It exists to recognize
14508 opportunities to optimize the output.
14510 For the 80386, we handle X+REG by loading X into a register R and
14511 using R+REG. R will go in a general reg and indexing will be used.
14512 However, if REG is a broken-out memory address or multiplication,
14513 nothing needs to be done because REG can certainly go in a general reg.
14515 When -fpic is used, special handling is needed for symbolic references.
14516 See comments by legitimize_pic_address in i386.c for details. */
14518 static rtx
14519 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14521 bool changed = false;
14522 unsigned log;
14524 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14525 if (log)
14526 return legitimize_tls_address (x, (enum tls_model) log, false);
14527 if (GET_CODE (x) == CONST
14528 && GET_CODE (XEXP (x, 0)) == PLUS
14529 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14530 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14532 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14533 (enum tls_model) log, false);
14534 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14537 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14539 rtx tmp = legitimize_pe_coff_symbol (x, true);
14540 if (tmp)
14541 return tmp;
14544 if (flag_pic && SYMBOLIC_CONST (x))
14545 return legitimize_pic_address (x, 0);
14547 #if TARGET_MACHO
14548 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14549 return machopic_indirect_data_reference (x, 0);
14550 #endif
14552 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14553 if (GET_CODE (x) == ASHIFT
14554 && CONST_INT_P (XEXP (x, 1))
14555 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14557 changed = true;
14558 log = INTVAL (XEXP (x, 1));
14559 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14560 GEN_INT (1 << log));
14563 if (GET_CODE (x) == PLUS)
14565 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14567 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14568 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14569 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14571 changed = true;
14572 log = INTVAL (XEXP (XEXP (x, 0), 1));
14573 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14574 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14575 GEN_INT (1 << log));
14578 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14579 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14580 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14582 changed = true;
14583 log = INTVAL (XEXP (XEXP (x, 1), 1));
14584 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14585 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14586 GEN_INT (1 << log));
14589 /* Put multiply first if it isn't already. */
14590 if (GET_CODE (XEXP (x, 1)) == MULT)
14592 std::swap (XEXP (x, 0), XEXP (x, 1));
14593 changed = true;
14596 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14597 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14598 created by virtual register instantiation, register elimination, and
14599 similar optimizations. */
14600 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14602 changed = true;
14603 x = gen_rtx_PLUS (Pmode,
14604 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14605 XEXP (XEXP (x, 1), 0)),
14606 XEXP (XEXP (x, 1), 1));
14609 /* Canonicalize
14610 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14611 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14612 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14613 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14614 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14615 && CONSTANT_P (XEXP (x, 1)))
14617 rtx constant;
14618 rtx other = NULL_RTX;
14620 if (CONST_INT_P (XEXP (x, 1)))
14622 constant = XEXP (x, 1);
14623 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14625 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14627 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14628 other = XEXP (x, 1);
14630 else
14631 constant = 0;
14633 if (constant)
14635 changed = true;
14636 x = gen_rtx_PLUS (Pmode,
14637 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14638 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14639 plus_constant (Pmode, other,
14640 INTVAL (constant)));
14644 if (changed && ix86_legitimate_address_p (mode, x, false))
14645 return x;
14647 if (GET_CODE (XEXP (x, 0)) == MULT)
14649 changed = true;
14650 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14653 if (GET_CODE (XEXP (x, 1)) == MULT)
14655 changed = true;
14656 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14659 if (changed
14660 && REG_P (XEXP (x, 1))
14661 && REG_P (XEXP (x, 0)))
14662 return x;
14664 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14666 changed = true;
14667 x = legitimize_pic_address (x, 0);
14670 if (changed && ix86_legitimate_address_p (mode, x, false))
14671 return x;
14673 if (REG_P (XEXP (x, 0)))
14675 rtx temp = gen_reg_rtx (Pmode);
14676 rtx val = force_operand (XEXP (x, 1), temp);
14677 if (val != temp)
14679 val = convert_to_mode (Pmode, val, 1);
14680 emit_move_insn (temp, val);
14683 XEXP (x, 1) = temp;
14684 return x;
14687 else if (REG_P (XEXP (x, 1)))
14689 rtx temp = gen_reg_rtx (Pmode);
14690 rtx val = force_operand (XEXP (x, 0), temp);
14691 if (val != temp)
14693 val = convert_to_mode (Pmode, val, 1);
14694 emit_move_insn (temp, val);
14697 XEXP (x, 0) = temp;
14698 return x;
14702 return x;
14705 /* Print an integer constant expression in assembler syntax. Addition
14706 and subtraction are the only arithmetic that may appear in these
14707 expressions. FILE is the stdio stream to write to, X is the rtx, and
14708 CODE is the operand print code from the output string. */
14710 static void
14711 output_pic_addr_const (FILE *file, rtx x, int code)
14713 char buf[256];
14715 switch (GET_CODE (x))
14717 case PC:
14718 gcc_assert (flag_pic);
14719 putc ('.', file);
14720 break;
14722 case SYMBOL_REF:
14723 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14724 output_addr_const (file, x);
14725 else
14727 const char *name = XSTR (x, 0);
14729 /* Mark the decl as referenced so that cgraph will
14730 output the function. */
14731 if (SYMBOL_REF_DECL (x))
14732 mark_decl_referenced (SYMBOL_REF_DECL (x));
14734 #if TARGET_MACHO
14735 if (MACHOPIC_INDIRECT
14736 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14737 name = machopic_indirection_name (x, /*stub_p=*/true);
14738 #endif
14739 assemble_name (file, name);
14741 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14742 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14743 fputs ("@PLT", file);
14744 break;
14746 case LABEL_REF:
14747 x = XEXP (x, 0);
14748 /* FALLTHRU */
14749 case CODE_LABEL:
14750 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14751 assemble_name (asm_out_file, buf);
14752 break;
14754 case CONST_INT:
14755 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14756 break;
14758 case CONST:
14759 /* This used to output parentheses around the expression,
14760 but that does not work on the 386 (either ATT or BSD assembler). */
14761 output_pic_addr_const (file, XEXP (x, 0), code);
14762 break;
14764 case CONST_DOUBLE:
14765 /* We can't handle floating point constants;
14766 TARGET_PRINT_OPERAND must handle them. */
14767 output_operand_lossage ("floating constant misused");
14768 break;
14770 case PLUS:
14771 /* Some assemblers need integer constants to appear first. */
14772 if (CONST_INT_P (XEXP (x, 0)))
14774 output_pic_addr_const (file, XEXP (x, 0), code);
14775 putc ('+', file);
14776 output_pic_addr_const (file, XEXP (x, 1), code);
14778 else
14780 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14781 output_pic_addr_const (file, XEXP (x, 1), code);
14782 putc ('+', file);
14783 output_pic_addr_const (file, XEXP (x, 0), code);
14785 break;
14787 case MINUS:
14788 if (!TARGET_MACHO)
14789 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14790 output_pic_addr_const (file, XEXP (x, 0), code);
14791 putc ('-', file);
14792 output_pic_addr_const (file, XEXP (x, 1), code);
14793 if (!TARGET_MACHO)
14794 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14795 break;
14797 case UNSPEC:
14798 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14800 bool f = i386_asm_output_addr_const_extra (file, x);
14801 gcc_assert (f);
14802 break;
14805 gcc_assert (XVECLEN (x, 0) == 1);
14806 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14807 switch (XINT (x, 1))
14809 case UNSPEC_GOT:
14810 fputs ("@GOT", file);
14811 break;
14812 case UNSPEC_GOTOFF:
14813 fputs ("@GOTOFF", file);
14814 break;
14815 case UNSPEC_PLTOFF:
14816 fputs ("@PLTOFF", file);
14817 break;
14818 case UNSPEC_PCREL:
14819 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14820 "(%rip)" : "[rip]", file);
14821 break;
14822 case UNSPEC_GOTPCREL:
14823 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14824 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14825 break;
14826 case UNSPEC_GOTTPOFF:
14827 /* FIXME: This might be @TPOFF in Sun ld too. */
14828 fputs ("@gottpoff", file);
14829 break;
14830 case UNSPEC_TPOFF:
14831 fputs ("@tpoff", file);
14832 break;
14833 case UNSPEC_NTPOFF:
14834 if (TARGET_64BIT)
14835 fputs ("@tpoff", file);
14836 else
14837 fputs ("@ntpoff", file);
14838 break;
14839 case UNSPEC_DTPOFF:
14840 fputs ("@dtpoff", file);
14841 break;
14842 case UNSPEC_GOTNTPOFF:
14843 if (TARGET_64BIT)
14844 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14845 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14846 else
14847 fputs ("@gotntpoff", file);
14848 break;
14849 case UNSPEC_INDNTPOFF:
14850 fputs ("@indntpoff", file);
14851 break;
14852 #if TARGET_MACHO
14853 case UNSPEC_MACHOPIC_OFFSET:
14854 putc ('-', file);
14855 machopic_output_function_base_name (file);
14856 break;
14857 #endif
14858 default:
14859 output_operand_lossage ("invalid UNSPEC as operand");
14860 break;
14862 break;
14864 default:
14865 output_operand_lossage ("invalid expression as operand");
14869 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14870 We need to emit DTP-relative relocations. */
14872 static void ATTRIBUTE_UNUSED
14873 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14875 fputs (ASM_LONG, file);
14876 output_addr_const (file, x);
14877 fputs ("@dtpoff", file);
14878 switch (size)
14880 case 4:
14881 break;
14882 case 8:
14883 fputs (", 0", file);
14884 break;
14885 default:
14886 gcc_unreachable ();
14890 /* Return true if X is a representation of the PIC register. This copes
14891 with calls from ix86_find_base_term, where the register might have
14892 been replaced by a cselib value. */
14894 static bool
14895 ix86_pic_register_p (rtx x)
14897 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14898 return (pic_offset_table_rtx
14899 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14900 else if (!REG_P (x))
14901 return false;
14902 else if (pic_offset_table_rtx)
14904 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14905 return true;
14906 if (HARD_REGISTER_P (x)
14907 && !HARD_REGISTER_P (pic_offset_table_rtx)
14908 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14909 return true;
14910 return false;
14912 else
14913 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14916 /* Helper function for ix86_delegitimize_address.
14917 Attempt to delegitimize TLS local-exec accesses. */
14919 static rtx
14920 ix86_delegitimize_tls_address (rtx orig_x)
14922 rtx x = orig_x, unspec;
14923 struct ix86_address addr;
14925 if (!TARGET_TLS_DIRECT_SEG_REFS)
14926 return orig_x;
14927 if (MEM_P (x))
14928 x = XEXP (x, 0);
14929 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14930 return orig_x;
14931 if (ix86_decompose_address (x, &addr) == 0
14932 || addr.seg != DEFAULT_TLS_SEG_REG
14933 || addr.disp == NULL_RTX
14934 || GET_CODE (addr.disp) != CONST)
14935 return orig_x;
14936 unspec = XEXP (addr.disp, 0);
14937 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14938 unspec = XEXP (unspec, 0);
14939 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14940 return orig_x;
14941 x = XVECEXP (unspec, 0, 0);
14942 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14943 if (unspec != XEXP (addr.disp, 0))
14944 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14945 if (addr.index)
14947 rtx idx = addr.index;
14948 if (addr.scale != 1)
14949 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14950 x = gen_rtx_PLUS (Pmode, idx, x);
14952 if (addr.base)
14953 x = gen_rtx_PLUS (Pmode, addr.base, x);
14954 if (MEM_P (orig_x))
14955 x = replace_equiv_address_nv (orig_x, x);
14956 return x;
14959 /* In the name of slightly smaller debug output, and to cater to
14960 general assembler lossage, recognize PIC+GOTOFF and turn it back
14961 into a direct symbol reference.
14963 On Darwin, this is necessary to avoid a crash, because Darwin
14964 has a different PIC label for each routine but the DWARF debugging
14965 information is not associated with any particular routine, so it's
14966 necessary to remove references to the PIC label from RTL stored by
14967 the DWARF output code. */
14969 static rtx
14970 ix86_delegitimize_address (rtx x)
14972 rtx orig_x = delegitimize_mem_from_attrs (x);
14973 /* addend is NULL or some rtx if x is something+GOTOFF where
14974 something doesn't include the PIC register. */
14975 rtx addend = NULL_RTX;
14976 /* reg_addend is NULL or a multiple of some register. */
14977 rtx reg_addend = NULL_RTX;
14978 /* const_addend is NULL or a const_int. */
14979 rtx const_addend = NULL_RTX;
14980 /* This is the result, or NULL. */
14981 rtx result = NULL_RTX;
14983 x = orig_x;
14985 if (MEM_P (x))
14986 x = XEXP (x, 0);
14988 if (TARGET_64BIT)
14990 if (GET_CODE (x) == CONST
14991 && GET_CODE (XEXP (x, 0)) == PLUS
14992 && GET_MODE (XEXP (x, 0)) == Pmode
14993 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14994 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14995 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14997 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14998 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14999 if (MEM_P (orig_x))
15000 x = replace_equiv_address_nv (orig_x, x);
15001 return x;
15004 if (GET_CODE (x) == CONST
15005 && GET_CODE (XEXP (x, 0)) == UNSPEC
15006 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
15007 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
15008 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
15010 x = XVECEXP (XEXP (x, 0), 0, 0);
15011 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
15013 x = simplify_gen_subreg (GET_MODE (orig_x), x,
15014 GET_MODE (x), 0);
15015 if (x == NULL_RTX)
15016 return orig_x;
15018 return x;
15021 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
15022 return ix86_delegitimize_tls_address (orig_x);
15024 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
15025 and -mcmodel=medium -fpic. */
15028 if (GET_CODE (x) != PLUS
15029 || GET_CODE (XEXP (x, 1)) != CONST)
15030 return ix86_delegitimize_tls_address (orig_x);
15032 if (ix86_pic_register_p (XEXP (x, 0)))
15033 /* %ebx + GOT/GOTOFF */
15035 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15037 /* %ebx + %reg * scale + GOT/GOTOFF */
15038 reg_addend = XEXP (x, 0);
15039 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
15040 reg_addend = XEXP (reg_addend, 1);
15041 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
15042 reg_addend = XEXP (reg_addend, 0);
15043 else
15045 reg_addend = NULL_RTX;
15046 addend = XEXP (x, 0);
15049 else
15050 addend = XEXP (x, 0);
15052 x = XEXP (XEXP (x, 1), 0);
15053 if (GET_CODE (x) == PLUS
15054 && CONST_INT_P (XEXP (x, 1)))
15056 const_addend = XEXP (x, 1);
15057 x = XEXP (x, 0);
15060 if (GET_CODE (x) == UNSPEC
15061 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
15062 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
15063 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
15064 && !MEM_P (orig_x) && !addend)))
15065 result = XVECEXP (x, 0, 0);
15067 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15068 && !MEM_P (orig_x))
15069 result = XVECEXP (x, 0, 0);
15071 if (! result)
15072 return ix86_delegitimize_tls_address (orig_x);
15074 if (const_addend)
15075 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15076 if (reg_addend)
15077 result = gen_rtx_PLUS (Pmode, reg_addend, result);
15078 if (addend)
15080 /* If the rest of original X doesn't involve the PIC register, add
15081 addend and subtract pic_offset_table_rtx. This can happen e.g.
15082 for code like:
15083 leal (%ebx, %ecx, 4), %ecx
15085 movl foo@GOTOFF(%ecx), %edx
15086 in which case we return (%ecx - %ebx) + foo
15087 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15088 and reload has completed. */
15089 if (pic_offset_table_rtx
15090 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15091 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15092 pic_offset_table_rtx),
15093 result);
15094 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15096 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15097 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15098 result = gen_rtx_PLUS (Pmode, tmp, result);
15100 else
15101 return orig_x;
15103 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15105 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15106 if (result == NULL_RTX)
15107 return orig_x;
15109 return result;
15112 /* If X is a machine specific address (i.e. a symbol or label being
15113 referenced as a displacement from the GOT implemented using an
15114 UNSPEC), then return the base term. Otherwise return X. */
15117 ix86_find_base_term (rtx x)
15119 rtx term;
15121 if (TARGET_64BIT)
15123 if (GET_CODE (x) != CONST)
15124 return x;
15125 term = XEXP (x, 0);
15126 if (GET_CODE (term) == PLUS
15127 && CONST_INT_P (XEXP (term, 1)))
15128 term = XEXP (term, 0);
15129 if (GET_CODE (term) != UNSPEC
15130 || (XINT (term, 1) != UNSPEC_GOTPCREL
15131 && XINT (term, 1) != UNSPEC_PCREL))
15132 return x;
15134 return XVECEXP (term, 0, 0);
15137 return ix86_delegitimize_address (x);
15140 static void
15141 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15142 bool fp, FILE *file)
15144 const char *suffix;
15146 if (mode == CCFPmode || mode == CCFPUmode)
15148 code = ix86_fp_compare_code_to_integer (code);
15149 mode = CCmode;
15151 if (reverse)
15152 code = reverse_condition (code);
15154 switch (code)
15156 case EQ:
15157 switch (mode)
15159 case CCAmode:
15160 suffix = "a";
15161 break;
15162 case CCCmode:
15163 suffix = "c";
15164 break;
15165 case CCOmode:
15166 suffix = "o";
15167 break;
15168 case CCPmode:
15169 suffix = "p";
15170 break;
15171 case CCSmode:
15172 suffix = "s";
15173 break;
15174 default:
15175 suffix = "e";
15176 break;
15178 break;
15179 case NE:
15180 switch (mode)
15182 case CCAmode:
15183 suffix = "na";
15184 break;
15185 case CCCmode:
15186 suffix = "nc";
15187 break;
15188 case CCOmode:
15189 suffix = "no";
15190 break;
15191 case CCPmode:
15192 suffix = "np";
15193 break;
15194 case CCSmode:
15195 suffix = "ns";
15196 break;
15197 default:
15198 suffix = "ne";
15199 break;
15201 break;
15202 case GT:
15203 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15204 suffix = "g";
15205 break;
15206 case GTU:
15207 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15208 Those same assemblers have the same but opposite lossage on cmov. */
15209 if (mode == CCmode)
15210 suffix = fp ? "nbe" : "a";
15211 else
15212 gcc_unreachable ();
15213 break;
15214 case LT:
15215 switch (mode)
15217 case CCNOmode:
15218 case CCGOCmode:
15219 suffix = "s";
15220 break;
15222 case CCmode:
15223 case CCGCmode:
15224 suffix = "l";
15225 break;
15227 default:
15228 gcc_unreachable ();
15230 break;
15231 case LTU:
15232 if (mode == CCmode)
15233 suffix = "b";
15234 else if (mode == CCCmode)
15235 suffix = fp ? "b" : "c";
15236 else
15237 gcc_unreachable ();
15238 break;
15239 case GE:
15240 switch (mode)
15242 case CCNOmode:
15243 case CCGOCmode:
15244 suffix = "ns";
15245 break;
15247 case CCmode:
15248 case CCGCmode:
15249 suffix = "ge";
15250 break;
15252 default:
15253 gcc_unreachable ();
15255 break;
15256 case GEU:
15257 if (mode == CCmode)
15258 suffix = "nb";
15259 else if (mode == CCCmode)
15260 suffix = fp ? "nb" : "nc";
15261 else
15262 gcc_unreachable ();
15263 break;
15264 case LE:
15265 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15266 suffix = "le";
15267 break;
15268 case LEU:
15269 if (mode == CCmode)
15270 suffix = "be";
15271 else
15272 gcc_unreachable ();
15273 break;
15274 case UNORDERED:
15275 suffix = fp ? "u" : "p";
15276 break;
15277 case ORDERED:
15278 suffix = fp ? "nu" : "np";
15279 break;
15280 default:
15281 gcc_unreachable ();
15283 fputs (suffix, file);
15286 /* Print the name of register X to FILE based on its machine mode and number.
15287 If CODE is 'w', pretend the mode is HImode.
15288 If CODE is 'b', pretend the mode is QImode.
15289 If CODE is 'k', pretend the mode is SImode.
15290 If CODE is 'q', pretend the mode is DImode.
15291 If CODE is 'x', pretend the mode is V4SFmode.
15292 If CODE is 't', pretend the mode is V8SFmode.
15293 If CODE is 'g', pretend the mode is V16SFmode.
15294 If CODE is 'h', pretend the reg is the 'high' byte register.
15295 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15296 If CODE is 'd', duplicate the operand for AVX instruction.
15299 void
15300 print_reg (rtx x, int code, FILE *file)
15302 const char *reg;
15303 int msize;
15304 unsigned int regno;
15305 bool duplicated;
15307 if (ASSEMBLER_DIALECT == ASM_ATT)
15308 putc ('%', file);
15310 if (x == pc_rtx)
15312 gcc_assert (TARGET_64BIT);
15313 fputs ("rip", file);
15314 return;
15317 if (code == 'y' && STACK_TOP_P (x))
15319 fputs ("st(0)", file);
15320 return;
15323 if (code == 'w')
15324 msize = 2;
15325 else if (code == 'b')
15326 msize = 1;
15327 else if (code == 'k')
15328 msize = 4;
15329 else if (code == 'q')
15330 msize = 8;
15331 else if (code == 'h')
15332 msize = 0;
15333 else if (code == 'x')
15334 msize = 16;
15335 else if (code == 't')
15336 msize = 32;
15337 else if (code == 'g')
15338 msize = 64;
15339 else
15340 msize = GET_MODE_SIZE (GET_MODE (x));
15342 regno = true_regnum (x);
15344 gcc_assert (regno != ARG_POINTER_REGNUM
15345 && regno != FRAME_POINTER_REGNUM
15346 && regno != FLAGS_REG
15347 && regno != FPSR_REG
15348 && regno != FPCR_REG);
15350 duplicated = code == 'd' && TARGET_AVX;
15352 switch (msize)
15354 case 8:
15355 case 4:
15356 if (LEGACY_INT_REGNO_P (regno))
15357 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
15358 case 16:
15359 case 12:
15360 case 2:
15361 normal:
15362 reg = hi_reg_name[regno];
15363 break;
15364 case 1:
15365 if (regno >= ARRAY_SIZE (qi_reg_name))
15366 goto normal;
15367 reg = qi_reg_name[regno];
15368 break;
15369 case 0:
15370 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15371 goto normal;
15372 reg = qi_high_reg_name[regno];
15373 break;
15374 case 32:
15375 case 64:
15376 if (SSE_REGNO_P (regno))
15378 gcc_assert (!duplicated);
15379 putc (msize == 32 ? 'y' : 'z', file);
15380 reg = hi_reg_name[regno] + 1;
15381 break;
15383 goto normal;
15384 default:
15385 gcc_unreachable ();
15388 fputs (reg, file);
15390 /* Irritatingly, AMD extended registers use
15391 different naming convention: "r%d[bwd]" */
15392 if (REX_INT_REGNO_P (regno))
15394 gcc_assert (TARGET_64BIT);
15395 switch (msize)
15397 case 0:
15398 error ("extended registers have no high halves");
15399 break;
15400 case 1:
15401 putc ('b', file);
15402 break;
15403 case 2:
15404 putc ('w', file);
15405 break;
15406 case 4:
15407 putc ('d', file);
15408 break;
15409 case 8:
15410 /* no suffix */
15411 break;
15412 default:
15413 error ("unsupported operand size for extended register");
15414 break;
15416 return;
15419 if (duplicated)
15421 if (ASSEMBLER_DIALECT == ASM_ATT)
15422 fprintf (file, ", %%%s", reg);
15423 else
15424 fprintf (file, ", %s", reg);
15428 /* Meaning of CODE:
15429 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15430 C -- print opcode suffix for set/cmov insn.
15431 c -- like C, but print reversed condition
15432 F,f -- likewise, but for floating-point.
15433 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15434 otherwise nothing
15435 R -- print embeded rounding and sae.
15436 r -- print only sae.
15437 z -- print the opcode suffix for the size of the current operand.
15438 Z -- likewise, with special suffixes for x87 instructions.
15439 * -- print a star (in certain assembler syntax)
15440 A -- print an absolute memory reference.
15441 E -- print address with DImode register names if TARGET_64BIT.
15442 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15443 s -- print a shift double count, followed by the assemblers argument
15444 delimiter.
15445 b -- print the QImode name of the register for the indicated operand.
15446 %b0 would print %al if operands[0] is reg 0.
15447 w -- likewise, print the HImode name of the register.
15448 k -- likewise, print the SImode name of the register.
15449 q -- likewise, print the DImode name of the register.
15450 x -- likewise, print the V4SFmode name of the register.
15451 t -- likewise, print the V8SFmode name of the register.
15452 g -- likewise, print the V16SFmode name of the register.
15453 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15454 y -- print "st(0)" instead of "st" as a register.
15455 d -- print duplicated register operand for AVX instruction.
15456 D -- print condition for SSE cmp instruction.
15457 P -- if PIC, print an @PLT suffix.
15458 p -- print raw symbol name.
15459 X -- don't print any sort of PIC '@' suffix for a symbol.
15460 & -- print some in-use local-dynamic symbol name.
15461 H -- print a memory address offset by 8; used for sse high-parts
15462 Y -- print condition for XOP pcom* instruction.
15463 + -- print a branch hint as 'cs' or 'ds' prefix
15464 ; -- print a semicolon (after prefixes due to bug in older gas).
15465 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15466 @ -- print a segment register of thread base pointer load
15467 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15468 ! -- print MPX prefix for jxx/call/ret instructions if required.
15471 void
15472 ix86_print_operand (FILE *file, rtx x, int code)
15474 if (code)
15476 switch (code)
15478 case 'A':
15479 switch (ASSEMBLER_DIALECT)
15481 case ASM_ATT:
15482 putc ('*', file);
15483 break;
15485 case ASM_INTEL:
15486 /* Intel syntax. For absolute addresses, registers should not
15487 be surrounded by braces. */
15488 if (!REG_P (x))
15490 putc ('[', file);
15491 ix86_print_operand (file, x, 0);
15492 putc (']', file);
15493 return;
15495 break;
15497 default:
15498 gcc_unreachable ();
15501 ix86_print_operand (file, x, 0);
15502 return;
15504 case 'E':
15505 /* Wrap address in an UNSPEC to declare special handling. */
15506 if (TARGET_64BIT)
15507 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15509 output_address (x);
15510 return;
15512 case 'L':
15513 if (ASSEMBLER_DIALECT == ASM_ATT)
15514 putc ('l', file);
15515 return;
15517 case 'W':
15518 if (ASSEMBLER_DIALECT == ASM_ATT)
15519 putc ('w', file);
15520 return;
15522 case 'B':
15523 if (ASSEMBLER_DIALECT == ASM_ATT)
15524 putc ('b', file);
15525 return;
15527 case 'Q':
15528 if (ASSEMBLER_DIALECT == ASM_ATT)
15529 putc ('l', file);
15530 return;
15532 case 'S':
15533 if (ASSEMBLER_DIALECT == ASM_ATT)
15534 putc ('s', file);
15535 return;
15537 case 'T':
15538 if (ASSEMBLER_DIALECT == ASM_ATT)
15539 putc ('t', file);
15540 return;
15542 case 'O':
15543 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15544 if (ASSEMBLER_DIALECT != ASM_ATT)
15545 return;
15547 switch (GET_MODE_SIZE (GET_MODE (x)))
15549 case 2:
15550 putc ('w', file);
15551 break;
15553 case 4:
15554 putc ('l', file);
15555 break;
15557 case 8:
15558 putc ('q', file);
15559 break;
15561 default:
15562 output_operand_lossage
15563 ("invalid operand size for operand code 'O'");
15564 return;
15567 putc ('.', file);
15568 #endif
15569 return;
15571 case 'z':
15572 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15574 /* Opcodes don't get size suffixes if using Intel opcodes. */
15575 if (ASSEMBLER_DIALECT == ASM_INTEL)
15576 return;
15578 switch (GET_MODE_SIZE (GET_MODE (x)))
15580 case 1:
15581 putc ('b', file);
15582 return;
15584 case 2:
15585 putc ('w', file);
15586 return;
15588 case 4:
15589 putc ('l', file);
15590 return;
15592 case 8:
15593 putc ('q', file);
15594 return;
15596 default:
15597 output_operand_lossage
15598 ("invalid operand size for operand code 'z'");
15599 return;
15603 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15604 warning
15605 (0, "non-integer operand used with operand code 'z'");
15606 /* FALLTHRU */
15608 case 'Z':
15609 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15610 if (ASSEMBLER_DIALECT == ASM_INTEL)
15611 return;
15613 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15615 switch (GET_MODE_SIZE (GET_MODE (x)))
15617 case 2:
15618 #ifdef HAVE_AS_IX86_FILDS
15619 putc ('s', file);
15620 #endif
15621 return;
15623 case 4:
15624 putc ('l', file);
15625 return;
15627 case 8:
15628 #ifdef HAVE_AS_IX86_FILDQ
15629 putc ('q', file);
15630 #else
15631 fputs ("ll", file);
15632 #endif
15633 return;
15635 default:
15636 break;
15639 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15641 /* 387 opcodes don't get size suffixes
15642 if the operands are registers. */
15643 if (STACK_REG_P (x))
15644 return;
15646 switch (GET_MODE_SIZE (GET_MODE (x)))
15648 case 4:
15649 putc ('s', file);
15650 return;
15652 case 8:
15653 putc ('l', file);
15654 return;
15656 case 12:
15657 case 16:
15658 putc ('t', file);
15659 return;
15661 default:
15662 break;
15665 else
15667 output_operand_lossage
15668 ("invalid operand type used with operand code 'Z'");
15669 return;
15672 output_operand_lossage
15673 ("invalid operand size for operand code 'Z'");
15674 return;
15676 case 'd':
15677 case 'b':
15678 case 'w':
15679 case 'k':
15680 case 'q':
15681 case 'h':
15682 case 't':
15683 case 'g':
15684 case 'y':
15685 case 'x':
15686 case 'X':
15687 case 'P':
15688 case 'p':
15689 break;
15691 case 's':
15692 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15694 ix86_print_operand (file, x, 0);
15695 fputs (", ", file);
15697 return;
15699 case 'Y':
15700 switch (GET_CODE (x))
15702 case NE:
15703 fputs ("neq", file);
15704 break;
15705 case EQ:
15706 fputs ("eq", file);
15707 break;
15708 case GE:
15709 case GEU:
15710 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15711 break;
15712 case GT:
15713 case GTU:
15714 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15715 break;
15716 case LE:
15717 case LEU:
15718 fputs ("le", file);
15719 break;
15720 case LT:
15721 case LTU:
15722 fputs ("lt", file);
15723 break;
15724 case UNORDERED:
15725 fputs ("unord", file);
15726 break;
15727 case ORDERED:
15728 fputs ("ord", file);
15729 break;
15730 case UNEQ:
15731 fputs ("ueq", file);
15732 break;
15733 case UNGE:
15734 fputs ("nlt", file);
15735 break;
15736 case UNGT:
15737 fputs ("nle", file);
15738 break;
15739 case UNLE:
15740 fputs ("ule", file);
15741 break;
15742 case UNLT:
15743 fputs ("ult", file);
15744 break;
15745 case LTGT:
15746 fputs ("une", file);
15747 break;
15748 default:
15749 output_operand_lossage ("operand is not a condition code, "
15750 "invalid operand code 'Y'");
15751 return;
15753 return;
15755 case 'D':
15756 /* Little bit of braindamage here. The SSE compare instructions
15757 does use completely different names for the comparisons that the
15758 fp conditional moves. */
15759 switch (GET_CODE (x))
15761 case UNEQ:
15762 if (TARGET_AVX)
15764 fputs ("eq_us", file);
15765 break;
15767 case EQ:
15768 fputs ("eq", file);
15769 break;
15770 case UNLT:
15771 if (TARGET_AVX)
15773 fputs ("nge", file);
15774 break;
15776 case LT:
15777 fputs ("lt", file);
15778 break;
15779 case UNLE:
15780 if (TARGET_AVX)
15782 fputs ("ngt", file);
15783 break;
15785 case LE:
15786 fputs ("le", file);
15787 break;
15788 case UNORDERED:
15789 fputs ("unord", file);
15790 break;
15791 case LTGT:
15792 if (TARGET_AVX)
15794 fputs ("neq_oq", file);
15795 break;
15797 case NE:
15798 fputs ("neq", file);
15799 break;
15800 case GE:
15801 if (TARGET_AVX)
15803 fputs ("ge", file);
15804 break;
15806 case UNGE:
15807 fputs ("nlt", file);
15808 break;
15809 case GT:
15810 if (TARGET_AVX)
15812 fputs ("gt", file);
15813 break;
15815 case UNGT:
15816 fputs ("nle", file);
15817 break;
15818 case ORDERED:
15819 fputs ("ord", file);
15820 break;
15821 default:
15822 output_operand_lossage ("operand is not a condition code, "
15823 "invalid operand code 'D'");
15824 return;
15826 return;
15828 case 'F':
15829 case 'f':
15830 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15831 if (ASSEMBLER_DIALECT == ASM_ATT)
15832 putc ('.', file);
15833 #endif
15835 case 'C':
15836 case 'c':
15837 if (!COMPARISON_P (x))
15839 output_operand_lossage ("operand is not a condition code, "
15840 "invalid operand code '%c'", code);
15841 return;
15843 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15844 code == 'c' || code == 'f',
15845 code == 'F' || code == 'f',
15846 file);
15847 return;
15849 case 'H':
15850 if (!offsettable_memref_p (x))
15852 output_operand_lossage ("operand is not an offsettable memory "
15853 "reference, invalid operand code 'H'");
15854 return;
15856 /* It doesn't actually matter what mode we use here, as we're
15857 only going to use this for printing. */
15858 x = adjust_address_nv (x, DImode, 8);
15859 /* Output 'qword ptr' for intel assembler dialect. */
15860 if (ASSEMBLER_DIALECT == ASM_INTEL)
15861 code = 'q';
15862 break;
15864 case 'K':
15865 gcc_assert (CONST_INT_P (x));
15867 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15868 #ifdef HAVE_AS_IX86_HLE
15869 fputs ("xacquire ", file);
15870 #else
15871 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15872 #endif
15873 else if (INTVAL (x) & IX86_HLE_RELEASE)
15874 #ifdef HAVE_AS_IX86_HLE
15875 fputs ("xrelease ", file);
15876 #else
15877 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15878 #endif
15879 /* We do not want to print value of the operand. */
15880 return;
15882 case 'N':
15883 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15884 fputs ("{z}", file);
15885 return;
15887 case 'r':
15888 gcc_assert (CONST_INT_P (x));
15889 gcc_assert (INTVAL (x) == ROUND_SAE);
15891 if (ASSEMBLER_DIALECT == ASM_INTEL)
15892 fputs (", ", file);
15894 fputs ("{sae}", file);
15896 if (ASSEMBLER_DIALECT == ASM_ATT)
15897 fputs (", ", file);
15899 return;
15901 case 'R':
15902 gcc_assert (CONST_INT_P (x));
15904 if (ASSEMBLER_DIALECT == ASM_INTEL)
15905 fputs (", ", file);
15907 switch (INTVAL (x))
15909 case ROUND_NEAREST_INT | ROUND_SAE:
15910 fputs ("{rn-sae}", file);
15911 break;
15912 case ROUND_NEG_INF | ROUND_SAE:
15913 fputs ("{rd-sae}", file);
15914 break;
15915 case ROUND_POS_INF | ROUND_SAE:
15916 fputs ("{ru-sae}", file);
15917 break;
15918 case ROUND_ZERO | ROUND_SAE:
15919 fputs ("{rz-sae}", file);
15920 break;
15921 default:
15922 gcc_unreachable ();
15925 if (ASSEMBLER_DIALECT == ASM_ATT)
15926 fputs (", ", file);
15928 return;
15930 case '*':
15931 if (ASSEMBLER_DIALECT == ASM_ATT)
15932 putc ('*', file);
15933 return;
15935 case '&':
15937 const char *name = get_some_local_dynamic_name ();
15938 if (name == NULL)
15939 output_operand_lossage ("'%%&' used without any "
15940 "local dynamic TLS references");
15941 else
15942 assemble_name (file, name);
15943 return;
15946 case '+':
15948 rtx x;
15950 if (!optimize
15951 || optimize_function_for_size_p (cfun)
15952 || !TARGET_BRANCH_PREDICTION_HINTS)
15953 return;
15955 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15956 if (x)
15958 int pred_val = XINT (x, 0);
15960 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15961 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15963 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15964 bool cputaken
15965 = final_forward_branch_p (current_output_insn) == 0;
15967 /* Emit hints only in the case default branch prediction
15968 heuristics would fail. */
15969 if (taken != cputaken)
15971 /* We use 3e (DS) prefix for taken branches and
15972 2e (CS) prefix for not taken branches. */
15973 if (taken)
15974 fputs ("ds ; ", file);
15975 else
15976 fputs ("cs ; ", file);
15980 return;
15983 case ';':
15984 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15985 putc (';', file);
15986 #endif
15987 return;
15989 case '@':
15990 if (ASSEMBLER_DIALECT == ASM_ATT)
15991 putc ('%', file);
15993 /* The kernel uses a different segment register for performance
15994 reasons; a system call would not have to trash the userspace
15995 segment register, which would be expensive. */
15996 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15997 fputs ("fs", file);
15998 else
15999 fputs ("gs", file);
16000 return;
16002 case '~':
16003 putc (TARGET_AVX2 ? 'i' : 'f', file);
16004 return;
16006 case '^':
16007 if (TARGET_64BIT && Pmode != word_mode)
16008 fputs ("addr32 ", file);
16009 return;
16011 case '!':
16012 if (ix86_bnd_prefixed_insn_p (current_output_insn))
16013 fputs ("bnd ", file);
16014 return;
16016 default:
16017 output_operand_lossage ("invalid operand code '%c'", code);
16021 if (REG_P (x))
16022 print_reg (x, code, file);
16024 else if (MEM_P (x))
16026 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
16027 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
16028 && GET_MODE (x) != BLKmode)
16030 const char * size;
16031 switch (GET_MODE_SIZE (GET_MODE (x)))
16033 case 1: size = "BYTE"; break;
16034 case 2: size = "WORD"; break;
16035 case 4: size = "DWORD"; break;
16036 case 8: size = "QWORD"; break;
16037 case 12: size = "TBYTE"; break;
16038 case 16:
16039 if (GET_MODE (x) == XFmode)
16040 size = "TBYTE";
16041 else
16042 size = "XMMWORD";
16043 break;
16044 case 32: size = "YMMWORD"; break;
16045 case 64: size = "ZMMWORD"; break;
16046 default:
16047 gcc_unreachable ();
16050 /* Check for explicit size override (codes 'b', 'w', 'k',
16051 'q' and 'x') */
16052 if (code == 'b')
16053 size = "BYTE";
16054 else if (code == 'w')
16055 size = "WORD";
16056 else if (code == 'k')
16057 size = "DWORD";
16058 else if (code == 'q')
16059 size = "QWORD";
16060 else if (code == 'x')
16061 size = "XMMWORD";
16063 fputs (size, file);
16064 fputs (" PTR ", file);
16067 x = XEXP (x, 0);
16068 /* Avoid (%rip) for call operands. */
16069 if (CONSTANT_ADDRESS_P (x) && code == 'P'
16070 && !CONST_INT_P (x))
16071 output_addr_const (file, x);
16072 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16073 output_operand_lossage ("invalid constraints for operand");
16074 else
16075 output_address (x);
16078 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
16080 REAL_VALUE_TYPE r;
16081 long l;
16083 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16084 REAL_VALUE_TO_TARGET_SINGLE (r, l);
16086 if (ASSEMBLER_DIALECT == ASM_ATT)
16087 putc ('$', file);
16088 /* Sign extend 32bit SFmode immediate to 8 bytes. */
16089 if (code == 'q')
16090 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16091 (unsigned long long) (int) l);
16092 else
16093 fprintf (file, "0x%08x", (unsigned int) l);
16096 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
16098 REAL_VALUE_TYPE r;
16099 long l[2];
16101 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16102 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16104 if (ASSEMBLER_DIALECT == ASM_ATT)
16105 putc ('$', file);
16106 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16109 /* These float cases don't actually occur as immediate operands. */
16110 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
16112 char dstr[30];
16114 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16115 fputs (dstr, file);
16118 else
16120 /* We have patterns that allow zero sets of memory, for instance.
16121 In 64-bit mode, we should probably support all 8-byte vectors,
16122 since we can in fact encode that into an immediate. */
16123 if (GET_CODE (x) == CONST_VECTOR)
16125 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16126 x = const0_rtx;
16129 if (code != 'P' && code != 'p')
16131 if (CONST_INT_P (x))
16133 if (ASSEMBLER_DIALECT == ASM_ATT)
16134 putc ('$', file);
16136 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16137 || GET_CODE (x) == LABEL_REF)
16139 if (ASSEMBLER_DIALECT == ASM_ATT)
16140 putc ('$', file);
16141 else
16142 fputs ("OFFSET FLAT:", file);
16145 if (CONST_INT_P (x))
16146 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16147 else if (flag_pic || MACHOPIC_INDIRECT)
16148 output_pic_addr_const (file, x, code);
16149 else
16150 output_addr_const (file, x);
16154 static bool
16155 ix86_print_operand_punct_valid_p (unsigned char code)
16157 return (code == '@' || code == '*' || code == '+' || code == '&'
16158 || code == ';' || code == '~' || code == '^' || code == '!');
16161 /* Print a memory operand whose address is ADDR. */
16163 static void
16164 ix86_print_operand_address (FILE *file, rtx addr)
16166 struct ix86_address parts;
16167 rtx base, index, disp;
16168 int scale;
16169 int ok;
16170 bool vsib = false;
16171 int code = 0;
16173 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16175 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16176 gcc_assert (parts.index == NULL_RTX);
16177 parts.index = XVECEXP (addr, 0, 1);
16178 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16179 addr = XVECEXP (addr, 0, 0);
16180 vsib = true;
16182 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16184 gcc_assert (TARGET_64BIT);
16185 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16186 code = 'q';
16188 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16190 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16191 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16192 if (parts.base != NULL_RTX)
16194 parts.index = parts.base;
16195 parts.scale = 1;
16197 parts.base = XVECEXP (addr, 0, 0);
16198 addr = XVECEXP (addr, 0, 0);
16200 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16202 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16203 gcc_assert (parts.index == NULL_RTX);
16204 parts.index = XVECEXP (addr, 0, 1);
16205 addr = XVECEXP (addr, 0, 0);
16207 else
16208 ok = ix86_decompose_address (addr, &parts);
16210 gcc_assert (ok);
16212 base = parts.base;
16213 index = parts.index;
16214 disp = parts.disp;
16215 scale = parts.scale;
16217 switch (parts.seg)
16219 case SEG_DEFAULT:
16220 break;
16221 case SEG_FS:
16222 case SEG_GS:
16223 if (ASSEMBLER_DIALECT == ASM_ATT)
16224 putc ('%', file);
16225 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16226 break;
16227 default:
16228 gcc_unreachable ();
16231 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16232 if (TARGET_64BIT && !base && !index)
16234 rtx symbol = disp;
16236 if (GET_CODE (disp) == CONST
16237 && GET_CODE (XEXP (disp, 0)) == PLUS
16238 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16239 symbol = XEXP (XEXP (disp, 0), 0);
16241 if (GET_CODE (symbol) == LABEL_REF
16242 || (GET_CODE (symbol) == SYMBOL_REF
16243 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16244 base = pc_rtx;
16246 if (!base && !index)
16248 /* Displacement only requires special attention. */
16250 if (CONST_INT_P (disp))
16252 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16253 fputs ("ds:", file);
16254 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16256 else if (flag_pic)
16257 output_pic_addr_const (file, disp, 0);
16258 else
16259 output_addr_const (file, disp);
16261 else
16263 /* Print SImode register names to force addr32 prefix. */
16264 if (SImode_address_operand (addr, VOIDmode))
16266 #ifdef ENABLE_CHECKING
16267 gcc_assert (TARGET_64BIT);
16268 switch (GET_CODE (addr))
16270 case SUBREG:
16271 gcc_assert (GET_MODE (addr) == SImode);
16272 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16273 break;
16274 case ZERO_EXTEND:
16275 case AND:
16276 gcc_assert (GET_MODE (addr) == DImode);
16277 break;
16278 default:
16279 gcc_unreachable ();
16281 #endif
16282 gcc_assert (!code);
16283 code = 'k';
16285 else if (code == 0
16286 && TARGET_X32
16287 && disp
16288 && CONST_INT_P (disp)
16289 && INTVAL (disp) < -16*1024*1024)
16291 /* X32 runs in 64-bit mode, where displacement, DISP, in
16292 address DISP(%r64), is encoded as 32-bit immediate sign-
16293 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16294 address is %r64 + 0xffffffffbffffd00. When %r64 <
16295 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16296 which is invalid for x32. The correct address is %r64
16297 - 0x40000300 == 0xf7ffdd64. To properly encode
16298 -0x40000300(%r64) for x32, we zero-extend negative
16299 displacement by forcing addr32 prefix which truncates
16300 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16301 zero-extend all negative displacements, including -1(%rsp).
16302 However, for small negative displacements, sign-extension
16303 won't cause overflow. We only zero-extend negative
16304 displacements if they < -16*1024*1024, which is also used
16305 to check legitimate address displacements for PIC. */
16306 code = 'k';
16309 if (ASSEMBLER_DIALECT == ASM_ATT)
16311 if (disp)
16313 if (flag_pic)
16314 output_pic_addr_const (file, disp, 0);
16315 else if (GET_CODE (disp) == LABEL_REF)
16316 output_asm_label (disp);
16317 else
16318 output_addr_const (file, disp);
16321 putc ('(', file);
16322 if (base)
16323 print_reg (base, code, file);
16324 if (index)
16326 putc (',', file);
16327 print_reg (index, vsib ? 0 : code, file);
16328 if (scale != 1 || vsib)
16329 fprintf (file, ",%d", scale);
16331 putc (')', file);
16333 else
16335 rtx offset = NULL_RTX;
16337 if (disp)
16339 /* Pull out the offset of a symbol; print any symbol itself. */
16340 if (GET_CODE (disp) == CONST
16341 && GET_CODE (XEXP (disp, 0)) == PLUS
16342 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16344 offset = XEXP (XEXP (disp, 0), 1);
16345 disp = gen_rtx_CONST (VOIDmode,
16346 XEXP (XEXP (disp, 0), 0));
16349 if (flag_pic)
16350 output_pic_addr_const (file, disp, 0);
16351 else if (GET_CODE (disp) == LABEL_REF)
16352 output_asm_label (disp);
16353 else if (CONST_INT_P (disp))
16354 offset = disp;
16355 else
16356 output_addr_const (file, disp);
16359 putc ('[', file);
16360 if (base)
16362 print_reg (base, code, file);
16363 if (offset)
16365 if (INTVAL (offset) >= 0)
16366 putc ('+', file);
16367 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16370 else if (offset)
16371 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16372 else
16373 putc ('0', file);
16375 if (index)
16377 putc ('+', file);
16378 print_reg (index, vsib ? 0 : code, file);
16379 if (scale != 1 || vsib)
16380 fprintf (file, "*%d", scale);
16382 putc (']', file);
16387 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16389 static bool
16390 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16392 rtx op;
16394 if (GET_CODE (x) != UNSPEC)
16395 return false;
16397 op = XVECEXP (x, 0, 0);
16398 switch (XINT (x, 1))
16400 case UNSPEC_GOTTPOFF:
16401 output_addr_const (file, op);
16402 /* FIXME: This might be @TPOFF in Sun ld. */
16403 fputs ("@gottpoff", file);
16404 break;
16405 case UNSPEC_TPOFF:
16406 output_addr_const (file, op);
16407 fputs ("@tpoff", file);
16408 break;
16409 case UNSPEC_NTPOFF:
16410 output_addr_const (file, op);
16411 if (TARGET_64BIT)
16412 fputs ("@tpoff", file);
16413 else
16414 fputs ("@ntpoff", file);
16415 break;
16416 case UNSPEC_DTPOFF:
16417 output_addr_const (file, op);
16418 fputs ("@dtpoff", file);
16419 break;
16420 case UNSPEC_GOTNTPOFF:
16421 output_addr_const (file, op);
16422 if (TARGET_64BIT)
16423 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16424 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16425 else
16426 fputs ("@gotntpoff", file);
16427 break;
16428 case UNSPEC_INDNTPOFF:
16429 output_addr_const (file, op);
16430 fputs ("@indntpoff", file);
16431 break;
16432 #if TARGET_MACHO
16433 case UNSPEC_MACHOPIC_OFFSET:
16434 output_addr_const (file, op);
16435 putc ('-', file);
16436 machopic_output_function_base_name (file);
16437 break;
16438 #endif
16440 case UNSPEC_STACK_CHECK:
16442 int offset;
16444 gcc_assert (flag_split_stack);
16446 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16447 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16448 #else
16449 gcc_unreachable ();
16450 #endif
16452 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16454 break;
16456 default:
16457 return false;
16460 return true;
16463 /* Split one or more double-mode RTL references into pairs of half-mode
16464 references. The RTL can be REG, offsettable MEM, integer constant, or
16465 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16466 split and "num" is its length. lo_half and hi_half are output arrays
16467 that parallel "operands". */
16469 void
16470 split_double_mode (machine_mode mode, rtx operands[],
16471 int num, rtx lo_half[], rtx hi_half[])
16473 machine_mode half_mode;
16474 unsigned int byte;
16476 switch (mode)
16478 case TImode:
16479 half_mode = DImode;
16480 break;
16481 case DImode:
16482 half_mode = SImode;
16483 break;
16484 default:
16485 gcc_unreachable ();
16488 byte = GET_MODE_SIZE (half_mode);
16490 while (num--)
16492 rtx op = operands[num];
16494 /* simplify_subreg refuse to split volatile memory addresses,
16495 but we still have to handle it. */
16496 if (MEM_P (op))
16498 lo_half[num] = adjust_address (op, half_mode, 0);
16499 hi_half[num] = adjust_address (op, half_mode, byte);
16501 else
16503 lo_half[num] = simplify_gen_subreg (half_mode, op,
16504 GET_MODE (op) == VOIDmode
16505 ? mode : GET_MODE (op), 0);
16506 hi_half[num] = simplify_gen_subreg (half_mode, op,
16507 GET_MODE (op) == VOIDmode
16508 ? mode : GET_MODE (op), byte);
16513 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16514 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16515 is the expression of the binary operation. The output may either be
16516 emitted here, or returned to the caller, like all output_* functions.
16518 There is no guarantee that the operands are the same mode, as they
16519 might be within FLOAT or FLOAT_EXTEND expressions. */
16521 #ifndef SYSV386_COMPAT
16522 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16523 wants to fix the assemblers because that causes incompatibility
16524 with gcc. No-one wants to fix gcc because that causes
16525 incompatibility with assemblers... You can use the option of
16526 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16527 #define SYSV386_COMPAT 1
16528 #endif
16530 const char *
16531 output_387_binary_op (rtx insn, rtx *operands)
16533 static char buf[40];
16534 const char *p;
16535 const char *ssep;
16536 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16538 #ifdef ENABLE_CHECKING
16539 /* Even if we do not want to check the inputs, this documents input
16540 constraints. Which helps in understanding the following code. */
16541 if (STACK_REG_P (operands[0])
16542 && ((REG_P (operands[1])
16543 && REGNO (operands[0]) == REGNO (operands[1])
16544 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16545 || (REG_P (operands[2])
16546 && REGNO (operands[0]) == REGNO (operands[2])
16547 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16548 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16549 ; /* ok */
16550 else
16551 gcc_assert (is_sse);
16552 #endif
16554 switch (GET_CODE (operands[3]))
16556 case PLUS:
16557 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16558 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16559 p = "fiadd";
16560 else
16561 p = "fadd";
16562 ssep = "vadd";
16563 break;
16565 case MINUS:
16566 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16567 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16568 p = "fisub";
16569 else
16570 p = "fsub";
16571 ssep = "vsub";
16572 break;
16574 case MULT:
16575 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16576 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16577 p = "fimul";
16578 else
16579 p = "fmul";
16580 ssep = "vmul";
16581 break;
16583 case DIV:
16584 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16585 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16586 p = "fidiv";
16587 else
16588 p = "fdiv";
16589 ssep = "vdiv";
16590 break;
16592 default:
16593 gcc_unreachable ();
16596 if (is_sse)
16598 if (TARGET_AVX)
16600 strcpy (buf, ssep);
16601 if (GET_MODE (operands[0]) == SFmode)
16602 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16603 else
16604 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16606 else
16608 strcpy (buf, ssep + 1);
16609 if (GET_MODE (operands[0]) == SFmode)
16610 strcat (buf, "ss\t{%2, %0|%0, %2}");
16611 else
16612 strcat (buf, "sd\t{%2, %0|%0, %2}");
16614 return buf;
16616 strcpy (buf, p);
16618 switch (GET_CODE (operands[3]))
16620 case MULT:
16621 case PLUS:
16622 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16623 std::swap (operands[1], operands[2]);
16625 /* know operands[0] == operands[1]. */
16627 if (MEM_P (operands[2]))
16629 p = "%Z2\t%2";
16630 break;
16633 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16635 if (STACK_TOP_P (operands[0]))
16636 /* How is it that we are storing to a dead operand[2]?
16637 Well, presumably operands[1] is dead too. We can't
16638 store the result to st(0) as st(0) gets popped on this
16639 instruction. Instead store to operands[2] (which I
16640 think has to be st(1)). st(1) will be popped later.
16641 gcc <= 2.8.1 didn't have this check and generated
16642 assembly code that the Unixware assembler rejected. */
16643 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16644 else
16645 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16646 break;
16649 if (STACK_TOP_P (operands[0]))
16650 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16651 else
16652 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16653 break;
16655 case MINUS:
16656 case DIV:
16657 if (MEM_P (operands[1]))
16659 p = "r%Z1\t%1";
16660 break;
16663 if (MEM_P (operands[2]))
16665 p = "%Z2\t%2";
16666 break;
16669 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16671 #if SYSV386_COMPAT
16672 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16673 derived assemblers, confusingly reverse the direction of
16674 the operation for fsub{r} and fdiv{r} when the
16675 destination register is not st(0). The Intel assembler
16676 doesn't have this brain damage. Read !SYSV386_COMPAT to
16677 figure out what the hardware really does. */
16678 if (STACK_TOP_P (operands[0]))
16679 p = "{p\t%0, %2|rp\t%2, %0}";
16680 else
16681 p = "{rp\t%2, %0|p\t%0, %2}";
16682 #else
16683 if (STACK_TOP_P (operands[0]))
16684 /* As above for fmul/fadd, we can't store to st(0). */
16685 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16686 else
16687 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16688 #endif
16689 break;
16692 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16694 #if SYSV386_COMPAT
16695 if (STACK_TOP_P (operands[0]))
16696 p = "{rp\t%0, %1|p\t%1, %0}";
16697 else
16698 p = "{p\t%1, %0|rp\t%0, %1}";
16699 #else
16700 if (STACK_TOP_P (operands[0]))
16701 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16702 else
16703 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16704 #endif
16705 break;
16708 if (STACK_TOP_P (operands[0]))
16710 if (STACK_TOP_P (operands[1]))
16711 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16712 else
16713 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16714 break;
16716 else if (STACK_TOP_P (operands[1]))
16718 #if SYSV386_COMPAT
16719 p = "{\t%1, %0|r\t%0, %1}";
16720 #else
16721 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16722 #endif
16724 else
16726 #if SYSV386_COMPAT
16727 p = "{r\t%2, %0|\t%0, %2}";
16728 #else
16729 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16730 #endif
16732 break;
16734 default:
16735 gcc_unreachable ();
16738 strcat (buf, p);
16739 return buf;
16742 /* Check if a 256bit AVX register is referenced inside of EXP. */
16744 static bool
16745 ix86_check_avx256_register (const_rtx exp)
16747 if (SUBREG_P (exp))
16748 exp = SUBREG_REG (exp);
16750 return (REG_P (exp)
16751 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16754 /* Return needed mode for entity in optimize_mode_switching pass. */
16756 static int
16757 ix86_avx_u128_mode_needed (rtx_insn *insn)
16759 if (CALL_P (insn))
16761 rtx link;
16763 /* Needed mode is set to AVX_U128_CLEAN if there are
16764 no 256bit modes used in function arguments. */
16765 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16766 link;
16767 link = XEXP (link, 1))
16769 if (GET_CODE (XEXP (link, 0)) == USE)
16771 rtx arg = XEXP (XEXP (link, 0), 0);
16773 if (ix86_check_avx256_register (arg))
16774 return AVX_U128_DIRTY;
16778 return AVX_U128_CLEAN;
16781 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16782 changes state only when a 256bit register is written to, but we need
16783 to prevent the compiler from moving optimal insertion point above
16784 eventual read from 256bit register. */
16785 subrtx_iterator::array_type array;
16786 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16787 if (ix86_check_avx256_register (*iter))
16788 return AVX_U128_DIRTY;
16790 return AVX_U128_ANY;
16793 /* Return mode that i387 must be switched into
16794 prior to the execution of insn. */
16796 static int
16797 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16799 enum attr_i387_cw mode;
16801 /* The mode UNINITIALIZED is used to store control word after a
16802 function call or ASM pattern. The mode ANY specify that function
16803 has no requirements on the control word and make no changes in the
16804 bits we are interested in. */
16806 if (CALL_P (insn)
16807 || (NONJUMP_INSN_P (insn)
16808 && (asm_noperands (PATTERN (insn)) >= 0
16809 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16810 return I387_CW_UNINITIALIZED;
16812 if (recog_memoized (insn) < 0)
16813 return I387_CW_ANY;
16815 mode = get_attr_i387_cw (insn);
16817 switch (entity)
16819 case I387_TRUNC:
16820 if (mode == I387_CW_TRUNC)
16821 return mode;
16822 break;
16824 case I387_FLOOR:
16825 if (mode == I387_CW_FLOOR)
16826 return mode;
16827 break;
16829 case I387_CEIL:
16830 if (mode == I387_CW_CEIL)
16831 return mode;
16832 break;
16834 case I387_MASK_PM:
16835 if (mode == I387_CW_MASK_PM)
16836 return mode;
16837 break;
16839 default:
16840 gcc_unreachable ();
16843 return I387_CW_ANY;
16846 /* Return mode that entity must be switched into
16847 prior to the execution of insn. */
16849 static int
16850 ix86_mode_needed (int entity, rtx_insn *insn)
16852 switch (entity)
16854 case AVX_U128:
16855 return ix86_avx_u128_mode_needed (insn);
16856 case I387_TRUNC:
16857 case I387_FLOOR:
16858 case I387_CEIL:
16859 case I387_MASK_PM:
16860 return ix86_i387_mode_needed (entity, insn);
16861 default:
16862 gcc_unreachable ();
16864 return 0;
16867 /* Check if a 256bit AVX register is referenced in stores. */
16869 static void
16870 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16872 if (ix86_check_avx256_register (dest))
16874 bool *used = (bool *) data;
16875 *used = true;
16879 /* Calculate mode of upper 128bit AVX registers after the insn. */
16881 static int
16882 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16884 rtx pat = PATTERN (insn);
16886 if (vzeroupper_operation (pat, VOIDmode)
16887 || vzeroall_operation (pat, VOIDmode))
16888 return AVX_U128_CLEAN;
16890 /* We know that state is clean after CALL insn if there are no
16891 256bit registers used in the function return register. */
16892 if (CALL_P (insn))
16894 bool avx_reg256_found = false;
16895 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16897 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16900 /* Otherwise, return current mode. Remember that if insn
16901 references AVX 256bit registers, the mode was already changed
16902 to DIRTY from MODE_NEEDED. */
16903 return mode;
16906 /* Return the mode that an insn results in. */
16908 static int
16909 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16911 switch (entity)
16913 case AVX_U128:
16914 return ix86_avx_u128_mode_after (mode, insn);
16915 case I387_TRUNC:
16916 case I387_FLOOR:
16917 case I387_CEIL:
16918 case I387_MASK_PM:
16919 return mode;
16920 default:
16921 gcc_unreachable ();
16925 static int
16926 ix86_avx_u128_mode_entry (void)
16928 tree arg;
16930 /* Entry mode is set to AVX_U128_DIRTY if there are
16931 256bit modes used in function arguments. */
16932 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16933 arg = TREE_CHAIN (arg))
16935 rtx incoming = DECL_INCOMING_RTL (arg);
16937 if (incoming && ix86_check_avx256_register (incoming))
16938 return AVX_U128_DIRTY;
16941 return AVX_U128_CLEAN;
16944 /* Return a mode that ENTITY is assumed to be
16945 switched to at function entry. */
16947 static int
16948 ix86_mode_entry (int entity)
16950 switch (entity)
16952 case AVX_U128:
16953 return ix86_avx_u128_mode_entry ();
16954 case I387_TRUNC:
16955 case I387_FLOOR:
16956 case I387_CEIL:
16957 case I387_MASK_PM:
16958 return I387_CW_ANY;
16959 default:
16960 gcc_unreachable ();
16964 static int
16965 ix86_avx_u128_mode_exit (void)
16967 rtx reg = crtl->return_rtx;
16969 /* Exit mode is set to AVX_U128_DIRTY if there are
16970 256bit modes used in the function return register. */
16971 if (reg && ix86_check_avx256_register (reg))
16972 return AVX_U128_DIRTY;
16974 return AVX_U128_CLEAN;
16977 /* Return a mode that ENTITY is assumed to be
16978 switched to at function exit. */
16980 static int
16981 ix86_mode_exit (int entity)
16983 switch (entity)
16985 case AVX_U128:
16986 return ix86_avx_u128_mode_exit ();
16987 case I387_TRUNC:
16988 case I387_FLOOR:
16989 case I387_CEIL:
16990 case I387_MASK_PM:
16991 return I387_CW_ANY;
16992 default:
16993 gcc_unreachable ();
16997 static int
16998 ix86_mode_priority (int, int n)
17000 return n;
17003 /* Output code to initialize control word copies used by trunc?f?i and
17004 rounding patterns. CURRENT_MODE is set to current control word,
17005 while NEW_MODE is set to new control word. */
17007 static void
17008 emit_i387_cw_initialization (int mode)
17010 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
17011 rtx new_mode;
17013 enum ix86_stack_slot slot;
17015 rtx reg = gen_reg_rtx (HImode);
17017 emit_insn (gen_x86_fnstcw_1 (stored_mode));
17018 emit_move_insn (reg, copy_rtx (stored_mode));
17020 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
17021 || optimize_insn_for_size_p ())
17023 switch (mode)
17025 case I387_CW_TRUNC:
17026 /* round toward zero (truncate) */
17027 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
17028 slot = SLOT_CW_TRUNC;
17029 break;
17031 case I387_CW_FLOOR:
17032 /* round down toward -oo */
17033 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17034 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
17035 slot = SLOT_CW_FLOOR;
17036 break;
17038 case I387_CW_CEIL:
17039 /* round up toward +oo */
17040 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17041 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
17042 slot = SLOT_CW_CEIL;
17043 break;
17045 case I387_CW_MASK_PM:
17046 /* mask precision exception for nearbyint() */
17047 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17048 slot = SLOT_CW_MASK_PM;
17049 break;
17051 default:
17052 gcc_unreachable ();
17055 else
17057 switch (mode)
17059 case I387_CW_TRUNC:
17060 /* round toward zero (truncate) */
17061 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
17062 slot = SLOT_CW_TRUNC;
17063 break;
17065 case I387_CW_FLOOR:
17066 /* round down toward -oo */
17067 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
17068 slot = SLOT_CW_FLOOR;
17069 break;
17071 case I387_CW_CEIL:
17072 /* round up toward +oo */
17073 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
17074 slot = SLOT_CW_CEIL;
17075 break;
17077 case I387_CW_MASK_PM:
17078 /* mask precision exception for nearbyint() */
17079 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17080 slot = SLOT_CW_MASK_PM;
17081 break;
17083 default:
17084 gcc_unreachable ();
17088 gcc_assert (slot < MAX_386_STACK_LOCALS);
17090 new_mode = assign_386_stack_local (HImode, slot);
17091 emit_move_insn (new_mode, reg);
17094 /* Emit vzeroupper. */
17096 void
17097 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17099 int i;
17101 /* Cancel automatic vzeroupper insertion if there are
17102 live call-saved SSE registers at the insertion point. */
17104 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17105 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17106 return;
17108 if (TARGET_64BIT)
17109 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17110 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17111 return;
17113 emit_insn (gen_avx_vzeroupper ());
17116 /* Generate one or more insns to set ENTITY to MODE. */
17118 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17119 is the set of hard registers live at the point where the insn(s)
17120 are to be inserted. */
17122 static void
17123 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17124 HARD_REG_SET regs_live)
17126 switch (entity)
17128 case AVX_U128:
17129 if (mode == AVX_U128_CLEAN)
17130 ix86_avx_emit_vzeroupper (regs_live);
17131 break;
17132 case I387_TRUNC:
17133 case I387_FLOOR:
17134 case I387_CEIL:
17135 case I387_MASK_PM:
17136 if (mode != I387_CW_ANY
17137 && mode != I387_CW_UNINITIALIZED)
17138 emit_i387_cw_initialization (mode);
17139 break;
17140 default:
17141 gcc_unreachable ();
17145 /* Output code for INSN to convert a float to a signed int. OPERANDS
17146 are the insn operands. The output may be [HSD]Imode and the input
17147 operand may be [SDX]Fmode. */
17149 const char *
17150 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17152 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17153 int dimode_p = GET_MODE (operands[0]) == DImode;
17154 int round_mode = get_attr_i387_cw (insn);
17156 /* Jump through a hoop or two for DImode, since the hardware has no
17157 non-popping instruction. We used to do this a different way, but
17158 that was somewhat fragile and broke with post-reload splitters. */
17159 if ((dimode_p || fisttp) && !stack_top_dies)
17160 output_asm_insn ("fld\t%y1", operands);
17162 gcc_assert (STACK_TOP_P (operands[1]));
17163 gcc_assert (MEM_P (operands[0]));
17164 gcc_assert (GET_MODE (operands[1]) != TFmode);
17166 if (fisttp)
17167 output_asm_insn ("fisttp%Z0\t%0", operands);
17168 else
17170 if (round_mode != I387_CW_ANY)
17171 output_asm_insn ("fldcw\t%3", operands);
17172 if (stack_top_dies || dimode_p)
17173 output_asm_insn ("fistp%Z0\t%0", operands);
17174 else
17175 output_asm_insn ("fist%Z0\t%0", operands);
17176 if (round_mode != I387_CW_ANY)
17177 output_asm_insn ("fldcw\t%2", operands);
17180 return "";
17183 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17184 have the values zero or one, indicates the ffreep insn's operand
17185 from the OPERANDS array. */
17187 static const char *
17188 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17190 if (TARGET_USE_FFREEP)
17191 #ifdef HAVE_AS_IX86_FFREEP
17192 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17193 #else
17195 static char retval[32];
17196 int regno = REGNO (operands[opno]);
17198 gcc_assert (STACK_REGNO_P (regno));
17200 regno -= FIRST_STACK_REG;
17202 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17203 return retval;
17205 #endif
17207 return opno ? "fstp\t%y1" : "fstp\t%y0";
17211 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17212 should be used. UNORDERED_P is true when fucom should be used. */
17214 const char *
17215 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17217 int stack_top_dies;
17218 rtx cmp_op0, cmp_op1;
17219 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17221 if (eflags_p)
17223 cmp_op0 = operands[0];
17224 cmp_op1 = operands[1];
17226 else
17228 cmp_op0 = operands[1];
17229 cmp_op1 = operands[2];
17232 if (is_sse)
17234 if (GET_MODE (operands[0]) == SFmode)
17235 if (unordered_p)
17236 return "%vucomiss\t{%1, %0|%0, %1}";
17237 else
17238 return "%vcomiss\t{%1, %0|%0, %1}";
17239 else
17240 if (unordered_p)
17241 return "%vucomisd\t{%1, %0|%0, %1}";
17242 else
17243 return "%vcomisd\t{%1, %0|%0, %1}";
17246 gcc_assert (STACK_TOP_P (cmp_op0));
17248 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17250 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17252 if (stack_top_dies)
17254 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17255 return output_387_ffreep (operands, 1);
17257 else
17258 return "ftst\n\tfnstsw\t%0";
17261 if (STACK_REG_P (cmp_op1)
17262 && stack_top_dies
17263 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17264 && REGNO (cmp_op1) != FIRST_STACK_REG)
17266 /* If both the top of the 387 stack dies, and the other operand
17267 is also a stack register that dies, then this must be a
17268 `fcompp' float compare */
17270 if (eflags_p)
17272 /* There is no double popping fcomi variant. Fortunately,
17273 eflags is immune from the fstp's cc clobbering. */
17274 if (unordered_p)
17275 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17276 else
17277 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17278 return output_387_ffreep (operands, 0);
17280 else
17282 if (unordered_p)
17283 return "fucompp\n\tfnstsw\t%0";
17284 else
17285 return "fcompp\n\tfnstsw\t%0";
17288 else
17290 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17292 static const char * const alt[16] =
17294 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17295 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17296 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17297 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17299 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17300 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17301 NULL,
17302 NULL,
17304 "fcomi\t{%y1, %0|%0, %y1}",
17305 "fcomip\t{%y1, %0|%0, %y1}",
17306 "fucomi\t{%y1, %0|%0, %y1}",
17307 "fucomip\t{%y1, %0|%0, %y1}",
17309 NULL,
17310 NULL,
17311 NULL,
17312 NULL
17315 int mask;
17316 const char *ret;
17318 mask = eflags_p << 3;
17319 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17320 mask |= unordered_p << 1;
17321 mask |= stack_top_dies;
17323 gcc_assert (mask < 16);
17324 ret = alt[mask];
17325 gcc_assert (ret);
17327 return ret;
17331 void
17332 ix86_output_addr_vec_elt (FILE *file, int value)
17334 const char *directive = ASM_LONG;
17336 #ifdef ASM_QUAD
17337 if (TARGET_LP64)
17338 directive = ASM_QUAD;
17339 #else
17340 gcc_assert (!TARGET_64BIT);
17341 #endif
17343 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17346 void
17347 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17349 const char *directive = ASM_LONG;
17351 #ifdef ASM_QUAD
17352 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17353 directive = ASM_QUAD;
17354 #else
17355 gcc_assert (!TARGET_64BIT);
17356 #endif
17357 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17358 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17359 fprintf (file, "%s%s%d-%s%d\n",
17360 directive, LPREFIX, value, LPREFIX, rel);
17361 else if (HAVE_AS_GOTOFF_IN_DATA)
17362 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17363 #if TARGET_MACHO
17364 else if (TARGET_MACHO)
17366 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17367 machopic_output_function_base_name (file);
17368 putc ('\n', file);
17370 #endif
17371 else
17372 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17373 GOT_SYMBOL_NAME, LPREFIX, value);
17376 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17377 for the target. */
17379 void
17380 ix86_expand_clear (rtx dest)
17382 rtx tmp;
17384 /* We play register width games, which are only valid after reload. */
17385 gcc_assert (reload_completed);
17387 /* Avoid HImode and its attendant prefix byte. */
17388 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17389 dest = gen_rtx_REG (SImode, REGNO (dest));
17390 tmp = gen_rtx_SET (dest, const0_rtx);
17392 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17394 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17395 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17398 emit_insn (tmp);
17401 /* X is an unchanging MEM. If it is a constant pool reference, return
17402 the constant pool rtx, else NULL. */
17405 maybe_get_pool_constant (rtx x)
17407 x = ix86_delegitimize_address (XEXP (x, 0));
17409 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17410 return get_pool_constant (x);
17412 return NULL_RTX;
17415 void
17416 ix86_expand_move (machine_mode mode, rtx operands[])
17418 rtx op0, op1;
17419 enum tls_model model;
17421 op0 = operands[0];
17422 op1 = operands[1];
17424 if (GET_CODE (op1) == SYMBOL_REF)
17426 rtx tmp;
17428 model = SYMBOL_REF_TLS_MODEL (op1);
17429 if (model)
17431 op1 = legitimize_tls_address (op1, model, true);
17432 op1 = force_operand (op1, op0);
17433 if (op1 == op0)
17434 return;
17435 op1 = convert_to_mode (mode, op1, 1);
17437 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17438 op1 = tmp;
17440 else if (GET_CODE (op1) == CONST
17441 && GET_CODE (XEXP (op1, 0)) == PLUS
17442 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17444 rtx addend = XEXP (XEXP (op1, 0), 1);
17445 rtx symbol = XEXP (XEXP (op1, 0), 0);
17446 rtx tmp;
17448 model = SYMBOL_REF_TLS_MODEL (symbol);
17449 if (model)
17450 tmp = legitimize_tls_address (symbol, model, true);
17451 else
17452 tmp = legitimize_pe_coff_symbol (symbol, true);
17454 if (tmp)
17456 tmp = force_operand (tmp, NULL);
17457 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17458 op0, 1, OPTAB_DIRECT);
17459 if (tmp == op0)
17460 return;
17461 op1 = convert_to_mode (mode, tmp, 1);
17465 if ((flag_pic || MACHOPIC_INDIRECT)
17466 && symbolic_operand (op1, mode))
17468 if (TARGET_MACHO && !TARGET_64BIT)
17470 #if TARGET_MACHO
17471 /* dynamic-no-pic */
17472 if (MACHOPIC_INDIRECT)
17474 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17475 ? op0 : gen_reg_rtx (Pmode);
17476 op1 = machopic_indirect_data_reference (op1, temp);
17477 if (MACHOPIC_PURE)
17478 op1 = machopic_legitimize_pic_address (op1, mode,
17479 temp == op1 ? 0 : temp);
17481 if (op0 != op1 && GET_CODE (op0) != MEM)
17483 rtx insn = gen_rtx_SET (op0, op1);
17484 emit_insn (insn);
17485 return;
17487 if (GET_CODE (op0) == MEM)
17488 op1 = force_reg (Pmode, op1);
17489 else
17491 rtx temp = op0;
17492 if (GET_CODE (temp) != REG)
17493 temp = gen_reg_rtx (Pmode);
17494 temp = legitimize_pic_address (op1, temp);
17495 if (temp == op0)
17496 return;
17497 op1 = temp;
17499 /* dynamic-no-pic */
17500 #endif
17502 else
17504 if (MEM_P (op0))
17505 op1 = force_reg (mode, op1);
17506 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17508 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17509 op1 = legitimize_pic_address (op1, reg);
17510 if (op0 == op1)
17511 return;
17512 op1 = convert_to_mode (mode, op1, 1);
17516 else
17518 if (MEM_P (op0)
17519 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17520 || !push_operand (op0, mode))
17521 && MEM_P (op1))
17522 op1 = force_reg (mode, op1);
17524 if (push_operand (op0, mode)
17525 && ! general_no_elim_operand (op1, mode))
17526 op1 = copy_to_mode_reg (mode, op1);
17528 /* Force large constants in 64bit compilation into register
17529 to get them CSEed. */
17530 if (can_create_pseudo_p ()
17531 && (mode == DImode) && TARGET_64BIT
17532 && immediate_operand (op1, mode)
17533 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17534 && !register_operand (op0, mode)
17535 && optimize)
17536 op1 = copy_to_mode_reg (mode, op1);
17538 if (can_create_pseudo_p ()
17539 && CONST_DOUBLE_P (op1))
17541 /* If we are loading a floating point constant to a register,
17542 force the value to memory now, since we'll get better code
17543 out the back end. */
17545 op1 = validize_mem (force_const_mem (mode, op1));
17546 if (!register_operand (op0, mode))
17548 rtx temp = gen_reg_rtx (mode);
17549 emit_insn (gen_rtx_SET (temp, op1));
17550 emit_move_insn (op0, temp);
17551 return;
17556 emit_insn (gen_rtx_SET (op0, op1));
17559 void
17560 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17562 rtx op0 = operands[0], op1 = operands[1];
17563 unsigned int align = GET_MODE_ALIGNMENT (mode);
17565 if (push_operand (op0, VOIDmode))
17566 op0 = emit_move_resolve_push (mode, op0);
17568 /* Force constants other than zero into memory. We do not know how
17569 the instructions used to build constants modify the upper 64 bits
17570 of the register, once we have that information we may be able
17571 to handle some of them more efficiently. */
17572 if (can_create_pseudo_p ()
17573 && register_operand (op0, mode)
17574 && (CONSTANT_P (op1)
17575 || (SUBREG_P (op1)
17576 && CONSTANT_P (SUBREG_REG (op1))))
17577 && !standard_sse_constant_p (op1))
17578 op1 = validize_mem (force_const_mem (mode, op1));
17580 /* We need to check memory alignment for SSE mode since attribute
17581 can make operands unaligned. */
17582 if (can_create_pseudo_p ()
17583 && SSE_REG_MODE_P (mode)
17584 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17585 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17587 rtx tmp[2];
17589 /* ix86_expand_vector_move_misalign() does not like constants ... */
17590 if (CONSTANT_P (op1)
17591 || (SUBREG_P (op1)
17592 && CONSTANT_P (SUBREG_REG (op1))))
17593 op1 = validize_mem (force_const_mem (mode, op1));
17595 /* ... nor both arguments in memory. */
17596 if (!register_operand (op0, mode)
17597 && !register_operand (op1, mode))
17598 op1 = force_reg (mode, op1);
17600 tmp[0] = op0; tmp[1] = op1;
17601 ix86_expand_vector_move_misalign (mode, tmp);
17602 return;
17605 /* Make operand1 a register if it isn't already. */
17606 if (can_create_pseudo_p ()
17607 && !register_operand (op0, mode)
17608 && !register_operand (op1, mode))
17610 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17611 return;
17614 emit_insn (gen_rtx_SET (op0, op1));
17617 /* Split 32-byte AVX unaligned load and store if needed. */
17619 static void
17620 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17622 rtx m;
17623 rtx (*extract) (rtx, rtx, rtx);
17624 rtx (*load_unaligned) (rtx, rtx);
17625 rtx (*store_unaligned) (rtx, rtx);
17626 machine_mode mode;
17628 switch (GET_MODE (op0))
17630 default:
17631 gcc_unreachable ();
17632 case V32QImode:
17633 extract = gen_avx_vextractf128v32qi;
17634 load_unaligned = gen_avx_loaddquv32qi;
17635 store_unaligned = gen_avx_storedquv32qi;
17636 mode = V16QImode;
17637 break;
17638 case V8SFmode:
17639 extract = gen_avx_vextractf128v8sf;
17640 load_unaligned = gen_avx_loadups256;
17641 store_unaligned = gen_avx_storeups256;
17642 mode = V4SFmode;
17643 break;
17644 case V4DFmode:
17645 extract = gen_avx_vextractf128v4df;
17646 load_unaligned = gen_avx_loadupd256;
17647 store_unaligned = gen_avx_storeupd256;
17648 mode = V2DFmode;
17649 break;
17652 if (MEM_P (op1))
17654 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17655 && optimize_insn_for_speed_p ())
17657 rtx r = gen_reg_rtx (mode);
17658 m = adjust_address (op1, mode, 0);
17659 emit_move_insn (r, m);
17660 m = adjust_address (op1, mode, 16);
17661 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17662 emit_move_insn (op0, r);
17664 /* Normal *mov<mode>_internal pattern will handle
17665 unaligned loads just fine if misaligned_operand
17666 is true, and without the UNSPEC it can be combined
17667 with arithmetic instructions. */
17668 else if (misaligned_operand (op1, GET_MODE (op1)))
17669 emit_insn (gen_rtx_SET (op0, op1));
17670 else
17671 emit_insn (load_unaligned (op0, op1));
17673 else if (MEM_P (op0))
17675 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17676 && optimize_insn_for_speed_p ())
17678 m = adjust_address (op0, mode, 0);
17679 emit_insn (extract (m, op1, const0_rtx));
17680 m = adjust_address (op0, mode, 16);
17681 emit_insn (extract (m, op1, const1_rtx));
17683 else
17684 emit_insn (store_unaligned (op0, op1));
17686 else
17687 gcc_unreachable ();
17690 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17691 straight to ix86_expand_vector_move. */
17692 /* Code generation for scalar reg-reg moves of single and double precision data:
17693 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17694 movaps reg, reg
17695 else
17696 movss reg, reg
17697 if (x86_sse_partial_reg_dependency == true)
17698 movapd reg, reg
17699 else
17700 movsd reg, reg
17702 Code generation for scalar loads of double precision data:
17703 if (x86_sse_split_regs == true)
17704 movlpd mem, reg (gas syntax)
17705 else
17706 movsd mem, reg
17708 Code generation for unaligned packed loads of single precision data
17709 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17710 if (x86_sse_unaligned_move_optimal)
17711 movups mem, reg
17713 if (x86_sse_partial_reg_dependency == true)
17715 xorps reg, reg
17716 movlps mem, reg
17717 movhps mem+8, reg
17719 else
17721 movlps mem, reg
17722 movhps mem+8, reg
17725 Code generation for unaligned packed loads of double precision data
17726 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17727 if (x86_sse_unaligned_move_optimal)
17728 movupd mem, reg
17730 if (x86_sse_split_regs == true)
17732 movlpd mem, reg
17733 movhpd mem+8, reg
17735 else
17737 movsd mem, reg
17738 movhpd mem+8, reg
17742 void
17743 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17745 rtx op0, op1, orig_op0 = NULL_RTX, m;
17746 rtx (*load_unaligned) (rtx, rtx);
17747 rtx (*store_unaligned) (rtx, rtx);
17749 op0 = operands[0];
17750 op1 = operands[1];
17752 if (GET_MODE_SIZE (mode) == 64)
17754 switch (GET_MODE_CLASS (mode))
17756 case MODE_VECTOR_INT:
17757 case MODE_INT:
17758 if (GET_MODE (op0) != V16SImode)
17760 if (!MEM_P (op0))
17762 orig_op0 = op0;
17763 op0 = gen_reg_rtx (V16SImode);
17765 else
17766 op0 = gen_lowpart (V16SImode, op0);
17768 op1 = gen_lowpart (V16SImode, op1);
17769 /* FALLTHRU */
17771 case MODE_VECTOR_FLOAT:
17772 switch (GET_MODE (op0))
17774 default:
17775 gcc_unreachable ();
17776 case V16SImode:
17777 load_unaligned = gen_avx512f_loaddquv16si;
17778 store_unaligned = gen_avx512f_storedquv16si;
17779 break;
17780 case V16SFmode:
17781 load_unaligned = gen_avx512f_loadups512;
17782 store_unaligned = gen_avx512f_storeups512;
17783 break;
17784 case V8DFmode:
17785 load_unaligned = gen_avx512f_loadupd512;
17786 store_unaligned = gen_avx512f_storeupd512;
17787 break;
17790 if (MEM_P (op1))
17791 emit_insn (load_unaligned (op0, op1));
17792 else if (MEM_P (op0))
17793 emit_insn (store_unaligned (op0, op1));
17794 else
17795 gcc_unreachable ();
17796 if (orig_op0)
17797 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17798 break;
17800 default:
17801 gcc_unreachable ();
17804 return;
17807 if (TARGET_AVX
17808 && GET_MODE_SIZE (mode) == 32)
17810 switch (GET_MODE_CLASS (mode))
17812 case MODE_VECTOR_INT:
17813 case MODE_INT:
17814 if (GET_MODE (op0) != V32QImode)
17816 if (!MEM_P (op0))
17818 orig_op0 = op0;
17819 op0 = gen_reg_rtx (V32QImode);
17821 else
17822 op0 = gen_lowpart (V32QImode, op0);
17824 op1 = gen_lowpart (V32QImode, op1);
17825 /* FALLTHRU */
17827 case MODE_VECTOR_FLOAT:
17828 ix86_avx256_split_vector_move_misalign (op0, op1);
17829 if (orig_op0)
17830 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17831 break;
17833 default:
17834 gcc_unreachable ();
17837 return;
17840 if (MEM_P (op1))
17842 /* Normal *mov<mode>_internal pattern will handle
17843 unaligned loads just fine if misaligned_operand
17844 is true, and without the UNSPEC it can be combined
17845 with arithmetic instructions. */
17846 if (TARGET_AVX
17847 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17848 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17849 && misaligned_operand (op1, GET_MODE (op1)))
17850 emit_insn (gen_rtx_SET (op0, op1));
17851 /* ??? If we have typed data, then it would appear that using
17852 movdqu is the only way to get unaligned data loaded with
17853 integer type. */
17854 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17856 if (GET_MODE (op0) != V16QImode)
17858 orig_op0 = op0;
17859 op0 = gen_reg_rtx (V16QImode);
17861 op1 = gen_lowpart (V16QImode, op1);
17862 /* We will eventually emit movups based on insn attributes. */
17863 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17864 if (orig_op0)
17865 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17867 else if (TARGET_SSE2 && mode == V2DFmode)
17869 rtx zero;
17871 if (TARGET_AVX
17872 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17873 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17874 || optimize_insn_for_size_p ())
17876 /* We will eventually emit movups based on insn attributes. */
17877 emit_insn (gen_sse2_loadupd (op0, op1));
17878 return;
17881 /* When SSE registers are split into halves, we can avoid
17882 writing to the top half twice. */
17883 if (TARGET_SSE_SPLIT_REGS)
17885 emit_clobber (op0);
17886 zero = op0;
17888 else
17890 /* ??? Not sure about the best option for the Intel chips.
17891 The following would seem to satisfy; the register is
17892 entirely cleared, breaking the dependency chain. We
17893 then store to the upper half, with a dependency depth
17894 of one. A rumor has it that Intel recommends two movsd
17895 followed by an unpacklpd, but this is unconfirmed. And
17896 given that the dependency depth of the unpacklpd would
17897 still be one, I'm not sure why this would be better. */
17898 zero = CONST0_RTX (V2DFmode);
17901 m = adjust_address (op1, DFmode, 0);
17902 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17903 m = adjust_address (op1, DFmode, 8);
17904 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17906 else
17908 rtx t;
17910 if (TARGET_AVX
17911 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17912 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17913 || optimize_insn_for_size_p ())
17915 if (GET_MODE (op0) != V4SFmode)
17917 orig_op0 = op0;
17918 op0 = gen_reg_rtx (V4SFmode);
17920 op1 = gen_lowpart (V4SFmode, op1);
17921 emit_insn (gen_sse_loadups (op0, op1));
17922 if (orig_op0)
17923 emit_move_insn (orig_op0,
17924 gen_lowpart (GET_MODE (orig_op0), op0));
17925 return;
17928 if (mode != V4SFmode)
17929 t = gen_reg_rtx (V4SFmode);
17930 else
17931 t = op0;
17933 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17934 emit_move_insn (t, CONST0_RTX (V4SFmode));
17935 else
17936 emit_clobber (t);
17938 m = adjust_address (op1, V2SFmode, 0);
17939 emit_insn (gen_sse_loadlps (t, t, m));
17940 m = adjust_address (op1, V2SFmode, 8);
17941 emit_insn (gen_sse_loadhps (t, t, m));
17942 if (mode != V4SFmode)
17943 emit_move_insn (op0, gen_lowpart (mode, t));
17946 else if (MEM_P (op0))
17948 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17950 op0 = gen_lowpart (V16QImode, op0);
17951 op1 = gen_lowpart (V16QImode, op1);
17952 /* We will eventually emit movups based on insn attributes. */
17953 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17955 else if (TARGET_SSE2 && mode == V2DFmode)
17957 if (TARGET_AVX
17958 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17959 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17960 || optimize_insn_for_size_p ())
17961 /* We will eventually emit movups based on insn attributes. */
17962 emit_insn (gen_sse2_storeupd (op0, op1));
17963 else
17965 m = adjust_address (op0, DFmode, 0);
17966 emit_insn (gen_sse2_storelpd (m, op1));
17967 m = adjust_address (op0, DFmode, 8);
17968 emit_insn (gen_sse2_storehpd (m, op1));
17971 else
17973 if (mode != V4SFmode)
17974 op1 = gen_lowpart (V4SFmode, op1);
17976 if (TARGET_AVX
17977 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17978 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17979 || optimize_insn_for_size_p ())
17981 op0 = gen_lowpart (V4SFmode, op0);
17982 emit_insn (gen_sse_storeups (op0, op1));
17984 else
17986 m = adjust_address (op0, V2SFmode, 0);
17987 emit_insn (gen_sse_storelps (m, op1));
17988 m = adjust_address (op0, V2SFmode, 8);
17989 emit_insn (gen_sse_storehps (m, op1));
17993 else
17994 gcc_unreachable ();
17997 /* Helper function of ix86_fixup_binary_operands to canonicalize
17998 operand order. Returns true if the operands should be swapped. */
18000 static bool
18001 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
18002 rtx operands[])
18004 rtx dst = operands[0];
18005 rtx src1 = operands[1];
18006 rtx src2 = operands[2];
18008 /* If the operation is not commutative, we can't do anything. */
18009 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
18010 return false;
18012 /* Highest priority is that src1 should match dst. */
18013 if (rtx_equal_p (dst, src1))
18014 return false;
18015 if (rtx_equal_p (dst, src2))
18016 return true;
18018 /* Next highest priority is that immediate constants come second. */
18019 if (immediate_operand (src2, mode))
18020 return false;
18021 if (immediate_operand (src1, mode))
18022 return true;
18024 /* Lowest priority is that memory references should come second. */
18025 if (MEM_P (src2))
18026 return false;
18027 if (MEM_P (src1))
18028 return true;
18030 return false;
18034 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
18035 destination to use for the operation. If different from the true
18036 destination in operands[0], a copy operation will be required. */
18039 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
18040 rtx operands[])
18042 rtx dst = operands[0];
18043 rtx src1 = operands[1];
18044 rtx src2 = operands[2];
18046 /* Canonicalize operand order. */
18047 if (ix86_swap_binary_operands_p (code, mode, operands))
18049 /* It is invalid to swap operands of different modes. */
18050 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
18052 std::swap (src1, src2);
18055 /* Both source operands cannot be in memory. */
18056 if (MEM_P (src1) && MEM_P (src2))
18058 /* Optimization: Only read from memory once. */
18059 if (rtx_equal_p (src1, src2))
18061 src2 = force_reg (mode, src2);
18062 src1 = src2;
18064 else if (rtx_equal_p (dst, src1))
18065 src2 = force_reg (mode, src2);
18066 else
18067 src1 = force_reg (mode, src1);
18070 /* If the destination is memory, and we do not have matching source
18071 operands, do things in registers. */
18072 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18073 dst = gen_reg_rtx (mode);
18075 /* Source 1 cannot be a constant. */
18076 if (CONSTANT_P (src1))
18077 src1 = force_reg (mode, src1);
18079 /* Source 1 cannot be a non-matching memory. */
18080 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18081 src1 = force_reg (mode, src1);
18083 /* Improve address combine. */
18084 if (code == PLUS
18085 && GET_MODE_CLASS (mode) == MODE_INT
18086 && MEM_P (src2))
18087 src2 = force_reg (mode, src2);
18089 operands[1] = src1;
18090 operands[2] = src2;
18091 return dst;
18094 /* Similarly, but assume that the destination has already been
18095 set up properly. */
18097 void
18098 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18099 machine_mode mode, rtx operands[])
18101 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18102 gcc_assert (dst == operands[0]);
18105 /* Attempt to expand a binary operator. Make the expansion closer to the
18106 actual machine, then just general_operand, which will allow 3 separate
18107 memory references (one output, two input) in a single insn. */
18109 void
18110 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18111 rtx operands[])
18113 rtx src1, src2, dst, op, clob;
18115 dst = ix86_fixup_binary_operands (code, mode, operands);
18116 src1 = operands[1];
18117 src2 = operands[2];
18119 /* Emit the instruction. */
18121 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18123 if (reload_completed
18124 && code == PLUS
18125 && !rtx_equal_p (dst, src1))
18127 /* This is going to be an LEA; avoid splitting it later. */
18128 emit_insn (op);
18130 else
18132 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18133 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18136 /* Fix up the destination if needed. */
18137 if (dst != operands[0])
18138 emit_move_insn (operands[0], dst);
18141 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18142 the given OPERANDS. */
18144 void
18145 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18146 rtx operands[])
18148 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18149 if (SUBREG_P (operands[1]))
18151 op1 = operands[1];
18152 op2 = operands[2];
18154 else if (SUBREG_P (operands[2]))
18156 op1 = operands[2];
18157 op2 = operands[1];
18159 /* Optimize (__m128i) d | (__m128i) e and similar code
18160 when d and e are float vectors into float vector logical
18161 insn. In C/C++ without using intrinsics there is no other way
18162 to express vector logical operation on float vectors than
18163 to cast them temporarily to integer vectors. */
18164 if (op1
18165 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18166 && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
18167 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18168 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18169 && SUBREG_BYTE (op1) == 0
18170 && (GET_CODE (op2) == CONST_VECTOR
18171 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18172 && SUBREG_BYTE (op2) == 0))
18173 && can_create_pseudo_p ())
18175 rtx dst;
18176 switch (GET_MODE (SUBREG_REG (op1)))
18178 case V4SFmode:
18179 case V8SFmode:
18180 case V16SFmode:
18181 case V2DFmode:
18182 case V4DFmode:
18183 case V8DFmode:
18184 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18185 if (GET_CODE (op2) == CONST_VECTOR)
18187 op2 = gen_lowpart (GET_MODE (dst), op2);
18188 op2 = force_reg (GET_MODE (dst), op2);
18190 else
18192 op1 = operands[1];
18193 op2 = SUBREG_REG (operands[2]);
18194 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18195 op2 = force_reg (GET_MODE (dst), op2);
18197 op1 = SUBREG_REG (op1);
18198 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18199 op1 = force_reg (GET_MODE (dst), op1);
18200 emit_insn (gen_rtx_SET (dst,
18201 gen_rtx_fmt_ee (code, GET_MODE (dst),
18202 op1, op2)));
18203 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18204 return;
18205 default:
18206 break;
18209 if (!nonimmediate_operand (operands[1], mode))
18210 operands[1] = force_reg (mode, operands[1]);
18211 if (!nonimmediate_operand (operands[2], mode))
18212 operands[2] = force_reg (mode, operands[2]);
18213 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18214 emit_insn (gen_rtx_SET (operands[0],
18215 gen_rtx_fmt_ee (code, mode, operands[1],
18216 operands[2])));
18219 /* Return TRUE or FALSE depending on whether the binary operator meets the
18220 appropriate constraints. */
18222 bool
18223 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18224 rtx operands[3])
18226 rtx dst = operands[0];
18227 rtx src1 = operands[1];
18228 rtx src2 = operands[2];
18230 /* Both source operands cannot be in memory. */
18231 if (MEM_P (src1) && MEM_P (src2))
18232 return false;
18234 /* Canonicalize operand order for commutative operators. */
18235 if (ix86_swap_binary_operands_p (code, mode, operands))
18236 std::swap (src1, src2);
18238 /* If the destination is memory, we must have a matching source operand. */
18239 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18240 return false;
18242 /* Source 1 cannot be a constant. */
18243 if (CONSTANT_P (src1))
18244 return false;
18246 /* Source 1 cannot be a non-matching memory. */
18247 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18248 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18249 return (code == AND
18250 && (mode == HImode
18251 || mode == SImode
18252 || (TARGET_64BIT && mode == DImode))
18253 && satisfies_constraint_L (src2));
18255 return true;
18258 /* Attempt to expand a unary operator. Make the expansion closer to the
18259 actual machine, then just general_operand, which will allow 2 separate
18260 memory references (one output, one input) in a single insn. */
18262 void
18263 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18264 rtx operands[])
18266 bool matching_memory = false;
18267 rtx src, dst, op, clob;
18269 dst = operands[0];
18270 src = operands[1];
18272 /* If the destination is memory, and we do not have matching source
18273 operands, do things in registers. */
18274 if (MEM_P (dst))
18276 if (rtx_equal_p (dst, src))
18277 matching_memory = true;
18278 else
18279 dst = gen_reg_rtx (mode);
18282 /* When source operand is memory, destination must match. */
18283 if (MEM_P (src) && !matching_memory)
18284 src = force_reg (mode, src);
18286 /* Emit the instruction. */
18288 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18290 if (code == NOT)
18291 emit_insn (op);
18292 else
18294 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18295 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18298 /* Fix up the destination if needed. */
18299 if (dst != operands[0])
18300 emit_move_insn (operands[0], dst);
18303 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18304 divisor are within the range [0-255]. */
18306 void
18307 ix86_split_idivmod (machine_mode mode, rtx operands[],
18308 bool signed_p)
18310 rtx_code_label *end_label, *qimode_label;
18311 rtx insn, div, mod;
18312 rtx scratch, tmp0, tmp1, tmp2;
18313 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18314 rtx (*gen_zero_extend) (rtx, rtx);
18315 rtx (*gen_test_ccno_1) (rtx, rtx);
18317 switch (mode)
18319 case SImode:
18320 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18321 gen_test_ccno_1 = gen_testsi_ccno_1;
18322 gen_zero_extend = gen_zero_extendqisi2;
18323 break;
18324 case DImode:
18325 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18326 gen_test_ccno_1 = gen_testdi_ccno_1;
18327 gen_zero_extend = gen_zero_extendqidi2;
18328 break;
18329 default:
18330 gcc_unreachable ();
18333 end_label = gen_label_rtx ();
18334 qimode_label = gen_label_rtx ();
18336 scratch = gen_reg_rtx (mode);
18338 /* Use 8bit unsigned divimod if dividend and divisor are within
18339 the range [0-255]. */
18340 emit_move_insn (scratch, operands[2]);
18341 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18342 scratch, 1, OPTAB_DIRECT);
18343 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18344 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18345 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18346 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18347 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18348 pc_rtx);
18349 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18350 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18351 JUMP_LABEL (insn) = qimode_label;
18353 /* Generate original signed/unsigned divimod. */
18354 div = gen_divmod4_1 (operands[0], operands[1],
18355 operands[2], operands[3]);
18356 emit_insn (div);
18358 /* Branch to the end. */
18359 emit_jump_insn (gen_jump (end_label));
18360 emit_barrier ();
18362 /* Generate 8bit unsigned divide. */
18363 emit_label (qimode_label);
18364 /* Don't use operands[0] for result of 8bit divide since not all
18365 registers support QImode ZERO_EXTRACT. */
18366 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18367 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18368 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18369 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18371 if (signed_p)
18373 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18374 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18376 else
18378 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18379 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18382 /* Extract remainder from AH. */
18383 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18384 if (REG_P (operands[1]))
18385 insn = emit_move_insn (operands[1], tmp1);
18386 else
18388 /* Need a new scratch register since the old one has result
18389 of 8bit divide. */
18390 scratch = gen_reg_rtx (mode);
18391 emit_move_insn (scratch, tmp1);
18392 insn = emit_move_insn (operands[1], scratch);
18394 set_unique_reg_note (insn, REG_EQUAL, mod);
18396 /* Zero extend quotient from AL. */
18397 tmp1 = gen_lowpart (QImode, tmp0);
18398 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18399 set_unique_reg_note (insn, REG_EQUAL, div);
18401 emit_label (end_label);
18404 #define LEA_MAX_STALL (3)
18405 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18407 /* Increase given DISTANCE in half-cycles according to
18408 dependencies between PREV and NEXT instructions.
18409 Add 1 half-cycle if there is no dependency and
18410 go to next cycle if there is some dependecy. */
18412 static unsigned int
18413 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18415 df_ref def, use;
18417 if (!prev || !next)
18418 return distance + (distance & 1) + 2;
18420 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18421 return distance + 1;
18423 FOR_EACH_INSN_USE (use, next)
18424 FOR_EACH_INSN_DEF (def, prev)
18425 if (!DF_REF_IS_ARTIFICIAL (def)
18426 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18427 return distance + (distance & 1) + 2;
18429 return distance + 1;
18432 /* Function checks if instruction INSN defines register number
18433 REGNO1 or REGNO2. */
18435 static bool
18436 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18437 rtx_insn *insn)
18439 df_ref def;
18441 FOR_EACH_INSN_DEF (def, insn)
18442 if (DF_REF_REG_DEF_P (def)
18443 && !DF_REF_IS_ARTIFICIAL (def)
18444 && (regno1 == DF_REF_REGNO (def)
18445 || regno2 == DF_REF_REGNO (def)))
18446 return true;
18448 return false;
18451 /* Function checks if instruction INSN uses register number
18452 REGNO as a part of address expression. */
18454 static bool
18455 insn_uses_reg_mem (unsigned int regno, rtx insn)
18457 df_ref use;
18459 FOR_EACH_INSN_USE (use, insn)
18460 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18461 return true;
18463 return false;
18466 /* Search backward for non-agu definition of register number REGNO1
18467 or register number REGNO2 in basic block starting from instruction
18468 START up to head of basic block or instruction INSN.
18470 Function puts true value into *FOUND var if definition was found
18471 and false otherwise.
18473 Distance in half-cycles between START and found instruction or head
18474 of BB is added to DISTANCE and returned. */
18476 static int
18477 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18478 rtx_insn *insn, int distance,
18479 rtx_insn *start, bool *found)
18481 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18482 rtx_insn *prev = start;
18483 rtx_insn *next = NULL;
18485 *found = false;
18487 while (prev
18488 && prev != insn
18489 && distance < LEA_SEARCH_THRESHOLD)
18491 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18493 distance = increase_distance (prev, next, distance);
18494 if (insn_defines_reg (regno1, regno2, prev))
18496 if (recog_memoized (prev) < 0
18497 || get_attr_type (prev) != TYPE_LEA)
18499 *found = true;
18500 return distance;
18504 next = prev;
18506 if (prev == BB_HEAD (bb))
18507 break;
18509 prev = PREV_INSN (prev);
18512 return distance;
18515 /* Search backward for non-agu definition of register number REGNO1
18516 or register number REGNO2 in INSN's basic block until
18517 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18518 2. Reach neighbour BBs boundary, or
18519 3. Reach agu definition.
18520 Returns the distance between the non-agu definition point and INSN.
18521 If no definition point, returns -1. */
18523 static int
18524 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18525 rtx_insn *insn)
18527 basic_block bb = BLOCK_FOR_INSN (insn);
18528 int distance = 0;
18529 bool found = false;
18531 if (insn != BB_HEAD (bb))
18532 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18533 distance, PREV_INSN (insn),
18534 &found);
18536 if (!found && distance < LEA_SEARCH_THRESHOLD)
18538 edge e;
18539 edge_iterator ei;
18540 bool simple_loop = false;
18542 FOR_EACH_EDGE (e, ei, bb->preds)
18543 if (e->src == bb)
18545 simple_loop = true;
18546 break;
18549 if (simple_loop)
18550 distance = distance_non_agu_define_in_bb (regno1, regno2,
18551 insn, distance,
18552 BB_END (bb), &found);
18553 else
18555 int shortest_dist = -1;
18556 bool found_in_bb = false;
18558 FOR_EACH_EDGE (e, ei, bb->preds)
18560 int bb_dist
18561 = distance_non_agu_define_in_bb (regno1, regno2,
18562 insn, distance,
18563 BB_END (e->src),
18564 &found_in_bb);
18565 if (found_in_bb)
18567 if (shortest_dist < 0)
18568 shortest_dist = bb_dist;
18569 else if (bb_dist > 0)
18570 shortest_dist = MIN (bb_dist, shortest_dist);
18572 found = true;
18576 distance = shortest_dist;
18580 /* get_attr_type may modify recog data. We want to make sure
18581 that recog data is valid for instruction INSN, on which
18582 distance_non_agu_define is called. INSN is unchanged here. */
18583 extract_insn_cached (insn);
18585 if (!found)
18586 return -1;
18588 return distance >> 1;
18591 /* Return the distance in half-cycles between INSN and the next
18592 insn that uses register number REGNO in memory address added
18593 to DISTANCE. Return -1 if REGNO0 is set.
18595 Put true value into *FOUND if register usage was found and
18596 false otherwise.
18597 Put true value into *REDEFINED if register redefinition was
18598 found and false otherwise. */
18600 static int
18601 distance_agu_use_in_bb (unsigned int regno,
18602 rtx_insn *insn, int distance, rtx_insn *start,
18603 bool *found, bool *redefined)
18605 basic_block bb = NULL;
18606 rtx_insn *next = start;
18607 rtx_insn *prev = NULL;
18609 *found = false;
18610 *redefined = false;
18612 if (start != NULL_RTX)
18614 bb = BLOCK_FOR_INSN (start);
18615 if (start != BB_HEAD (bb))
18616 /* If insn and start belong to the same bb, set prev to insn,
18617 so the call to increase_distance will increase the distance
18618 between insns by 1. */
18619 prev = insn;
18622 while (next
18623 && next != insn
18624 && distance < LEA_SEARCH_THRESHOLD)
18626 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18628 distance = increase_distance(prev, next, distance);
18629 if (insn_uses_reg_mem (regno, next))
18631 /* Return DISTANCE if OP0 is used in memory
18632 address in NEXT. */
18633 *found = true;
18634 return distance;
18637 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18639 /* Return -1 if OP0 is set in NEXT. */
18640 *redefined = true;
18641 return -1;
18644 prev = next;
18647 if (next == BB_END (bb))
18648 break;
18650 next = NEXT_INSN (next);
18653 return distance;
18656 /* Return the distance between INSN and the next insn that uses
18657 register number REGNO0 in memory address. Return -1 if no such
18658 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18660 static int
18661 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18663 basic_block bb = BLOCK_FOR_INSN (insn);
18664 int distance = 0;
18665 bool found = false;
18666 bool redefined = false;
18668 if (insn != BB_END (bb))
18669 distance = distance_agu_use_in_bb (regno0, insn, distance,
18670 NEXT_INSN (insn),
18671 &found, &redefined);
18673 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18675 edge e;
18676 edge_iterator ei;
18677 bool simple_loop = false;
18679 FOR_EACH_EDGE (e, ei, bb->succs)
18680 if (e->dest == bb)
18682 simple_loop = true;
18683 break;
18686 if (simple_loop)
18687 distance = distance_agu_use_in_bb (regno0, insn,
18688 distance, BB_HEAD (bb),
18689 &found, &redefined);
18690 else
18692 int shortest_dist = -1;
18693 bool found_in_bb = false;
18694 bool redefined_in_bb = false;
18696 FOR_EACH_EDGE (e, ei, bb->succs)
18698 int bb_dist
18699 = distance_agu_use_in_bb (regno0, insn,
18700 distance, BB_HEAD (e->dest),
18701 &found_in_bb, &redefined_in_bb);
18702 if (found_in_bb)
18704 if (shortest_dist < 0)
18705 shortest_dist = bb_dist;
18706 else if (bb_dist > 0)
18707 shortest_dist = MIN (bb_dist, shortest_dist);
18709 found = true;
18713 distance = shortest_dist;
18717 if (!found || redefined)
18718 return -1;
18720 return distance >> 1;
18723 /* Define this macro to tune LEA priority vs ADD, it take effect when
18724 there is a dilemma of choicing LEA or ADD
18725 Negative value: ADD is more preferred than LEA
18726 Zero: Netrual
18727 Positive value: LEA is more preferred than ADD*/
18728 #define IX86_LEA_PRIORITY 0
18730 /* Return true if usage of lea INSN has performance advantage
18731 over a sequence of instructions. Instructions sequence has
18732 SPLIT_COST cycles higher latency than lea latency. */
18734 static bool
18735 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18736 unsigned int regno2, int split_cost, bool has_scale)
18738 int dist_define, dist_use;
18740 /* For Silvermont if using a 2-source or 3-source LEA for
18741 non-destructive destination purposes, or due to wanting
18742 ability to use SCALE, the use of LEA is justified. */
18743 if (TARGET_SILVERMONT || TARGET_INTEL)
18745 if (has_scale)
18746 return true;
18747 if (split_cost < 1)
18748 return false;
18749 if (regno0 == regno1 || regno0 == regno2)
18750 return false;
18751 return true;
18754 dist_define = distance_non_agu_define (regno1, regno2, insn);
18755 dist_use = distance_agu_use (regno0, insn);
18757 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18759 /* If there is no non AGU operand definition, no AGU
18760 operand usage and split cost is 0 then both lea
18761 and non lea variants have same priority. Currently
18762 we prefer lea for 64 bit code and non lea on 32 bit
18763 code. */
18764 if (dist_use < 0 && split_cost == 0)
18765 return TARGET_64BIT || IX86_LEA_PRIORITY;
18766 else
18767 return true;
18770 /* With longer definitions distance lea is more preferable.
18771 Here we change it to take into account splitting cost and
18772 lea priority. */
18773 dist_define += split_cost + IX86_LEA_PRIORITY;
18775 /* If there is no use in memory addess then we just check
18776 that split cost exceeds AGU stall. */
18777 if (dist_use < 0)
18778 return dist_define > LEA_MAX_STALL;
18780 /* If this insn has both backward non-agu dependence and forward
18781 agu dependence, the one with short distance takes effect. */
18782 return dist_define >= dist_use;
18785 /* Return true if it is legal to clobber flags by INSN and
18786 false otherwise. */
18788 static bool
18789 ix86_ok_to_clobber_flags (rtx_insn *insn)
18791 basic_block bb = BLOCK_FOR_INSN (insn);
18792 df_ref use;
18793 bitmap live;
18795 while (insn)
18797 if (NONDEBUG_INSN_P (insn))
18799 FOR_EACH_INSN_USE (use, insn)
18800 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18801 return false;
18803 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18804 return true;
18807 if (insn == BB_END (bb))
18808 break;
18810 insn = NEXT_INSN (insn);
18813 live = df_get_live_out(bb);
18814 return !REGNO_REG_SET_P (live, FLAGS_REG);
18817 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18818 move and add to avoid AGU stalls. */
18820 bool
18821 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18823 unsigned int regno0, regno1, regno2;
18825 /* Check if we need to optimize. */
18826 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18827 return false;
18829 /* Check it is correct to split here. */
18830 if (!ix86_ok_to_clobber_flags(insn))
18831 return false;
18833 regno0 = true_regnum (operands[0]);
18834 regno1 = true_regnum (operands[1]);
18835 regno2 = true_regnum (operands[2]);
18837 /* We need to split only adds with non destructive
18838 destination operand. */
18839 if (regno0 == regno1 || regno0 == regno2)
18840 return false;
18841 else
18842 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18845 /* Return true if we should emit lea instruction instead of mov
18846 instruction. */
18848 bool
18849 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18851 unsigned int regno0, regno1;
18853 /* Check if we need to optimize. */
18854 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18855 return false;
18857 /* Use lea for reg to reg moves only. */
18858 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18859 return false;
18861 regno0 = true_regnum (operands[0]);
18862 regno1 = true_regnum (operands[1]);
18864 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18867 /* Return true if we need to split lea into a sequence of
18868 instructions to avoid AGU stalls. */
18870 bool
18871 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18873 unsigned int regno0, regno1, regno2;
18874 int split_cost;
18875 struct ix86_address parts;
18876 int ok;
18878 /* Check we need to optimize. */
18879 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18880 return false;
18882 /* The "at least two components" test below might not catch simple
18883 move or zero extension insns if parts.base is non-NULL and parts.disp
18884 is const0_rtx as the only components in the address, e.g. if the
18885 register is %rbp or %r13. As this test is much cheaper and moves or
18886 zero extensions are the common case, do this check first. */
18887 if (REG_P (operands[1])
18888 || (SImode_address_operand (operands[1], VOIDmode)
18889 && REG_P (XEXP (operands[1], 0))))
18890 return false;
18892 /* Check if it is OK to split here. */
18893 if (!ix86_ok_to_clobber_flags (insn))
18894 return false;
18896 ok = ix86_decompose_address (operands[1], &parts);
18897 gcc_assert (ok);
18899 /* There should be at least two components in the address. */
18900 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18901 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18902 return false;
18904 /* We should not split into add if non legitimate pic
18905 operand is used as displacement. */
18906 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18907 return false;
18909 regno0 = true_regnum (operands[0]) ;
18910 regno1 = INVALID_REGNUM;
18911 regno2 = INVALID_REGNUM;
18913 if (parts.base)
18914 regno1 = true_regnum (parts.base);
18915 if (parts.index)
18916 regno2 = true_regnum (parts.index);
18918 split_cost = 0;
18920 /* Compute how many cycles we will add to execution time
18921 if split lea into a sequence of instructions. */
18922 if (parts.base || parts.index)
18924 /* Have to use mov instruction if non desctructive
18925 destination form is used. */
18926 if (regno1 != regno0 && regno2 != regno0)
18927 split_cost += 1;
18929 /* Have to add index to base if both exist. */
18930 if (parts.base && parts.index)
18931 split_cost += 1;
18933 /* Have to use shift and adds if scale is 2 or greater. */
18934 if (parts.scale > 1)
18936 if (regno0 != regno1)
18937 split_cost += 1;
18938 else if (regno2 == regno0)
18939 split_cost += 4;
18940 else
18941 split_cost += parts.scale;
18944 /* Have to use add instruction with immediate if
18945 disp is non zero. */
18946 if (parts.disp && parts.disp != const0_rtx)
18947 split_cost += 1;
18949 /* Subtract the price of lea. */
18950 split_cost -= 1;
18953 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18954 parts.scale > 1);
18957 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18958 matches destination. RTX includes clobber of FLAGS_REG. */
18960 static void
18961 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18962 rtx dst, rtx src)
18964 rtx op, clob;
18966 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18967 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18969 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18972 /* Return true if regno1 def is nearest to the insn. */
18974 static bool
18975 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18977 rtx_insn *prev = insn;
18978 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18980 if (insn == start)
18981 return false;
18982 while (prev && prev != start)
18984 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18986 prev = PREV_INSN (prev);
18987 continue;
18989 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18990 return true;
18991 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18992 return false;
18993 prev = PREV_INSN (prev);
18996 /* None of the regs is defined in the bb. */
18997 return false;
19000 /* Split lea instructions into a sequence of instructions
19001 which are executed on ALU to avoid AGU stalls.
19002 It is assumed that it is allowed to clobber flags register
19003 at lea position. */
19005 void
19006 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
19008 unsigned int regno0, regno1, regno2;
19009 struct ix86_address parts;
19010 rtx target, tmp;
19011 int ok, adds;
19013 ok = ix86_decompose_address (operands[1], &parts);
19014 gcc_assert (ok);
19016 target = gen_lowpart (mode, operands[0]);
19018 regno0 = true_regnum (target);
19019 regno1 = INVALID_REGNUM;
19020 regno2 = INVALID_REGNUM;
19022 if (parts.base)
19024 parts.base = gen_lowpart (mode, parts.base);
19025 regno1 = true_regnum (parts.base);
19028 if (parts.index)
19030 parts.index = gen_lowpart (mode, parts.index);
19031 regno2 = true_regnum (parts.index);
19034 if (parts.disp)
19035 parts.disp = gen_lowpart (mode, parts.disp);
19037 if (parts.scale > 1)
19039 /* Case r1 = r1 + ... */
19040 if (regno1 == regno0)
19042 /* If we have a case r1 = r1 + C * r2 then we
19043 should use multiplication which is very
19044 expensive. Assume cost model is wrong if we
19045 have such case here. */
19046 gcc_assert (regno2 != regno0);
19048 for (adds = parts.scale; adds > 0; adds--)
19049 ix86_emit_binop (PLUS, mode, target, parts.index);
19051 else
19053 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
19054 if (regno0 != regno2)
19055 emit_insn (gen_rtx_SET (target, parts.index));
19057 /* Use shift for scaling. */
19058 ix86_emit_binop (ASHIFT, mode, target,
19059 GEN_INT (exact_log2 (parts.scale)));
19061 if (parts.base)
19062 ix86_emit_binop (PLUS, mode, target, parts.base);
19064 if (parts.disp && parts.disp != const0_rtx)
19065 ix86_emit_binop (PLUS, mode, target, parts.disp);
19068 else if (!parts.base && !parts.index)
19070 gcc_assert(parts.disp);
19071 emit_insn (gen_rtx_SET (target, parts.disp));
19073 else
19075 if (!parts.base)
19077 if (regno0 != regno2)
19078 emit_insn (gen_rtx_SET (target, parts.index));
19080 else if (!parts.index)
19082 if (regno0 != regno1)
19083 emit_insn (gen_rtx_SET (target, parts.base));
19085 else
19087 if (regno0 == regno1)
19088 tmp = parts.index;
19089 else if (regno0 == regno2)
19090 tmp = parts.base;
19091 else
19093 rtx tmp1;
19095 /* Find better operand for SET instruction, depending
19096 on which definition is farther from the insn. */
19097 if (find_nearest_reg_def (insn, regno1, regno2))
19098 tmp = parts.index, tmp1 = parts.base;
19099 else
19100 tmp = parts.base, tmp1 = parts.index;
19102 emit_insn (gen_rtx_SET (target, tmp));
19104 if (parts.disp && parts.disp != const0_rtx)
19105 ix86_emit_binop (PLUS, mode, target, parts.disp);
19107 ix86_emit_binop (PLUS, mode, target, tmp1);
19108 return;
19111 ix86_emit_binop (PLUS, mode, target, tmp);
19114 if (parts.disp && parts.disp != const0_rtx)
19115 ix86_emit_binop (PLUS, mode, target, parts.disp);
19119 /* Return true if it is ok to optimize an ADD operation to LEA
19120 operation to avoid flag register consumation. For most processors,
19121 ADD is faster than LEA. For the processors like BONNELL, if the
19122 destination register of LEA holds an actual address which will be
19123 used soon, LEA is better and otherwise ADD is better. */
19125 bool
19126 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19128 unsigned int regno0 = true_regnum (operands[0]);
19129 unsigned int regno1 = true_regnum (operands[1]);
19130 unsigned int regno2 = true_regnum (operands[2]);
19132 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19133 if (regno0 != regno1 && regno0 != regno2)
19134 return true;
19136 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19137 return false;
19139 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19142 /* Return true if destination reg of SET_BODY is shift count of
19143 USE_BODY. */
19145 static bool
19146 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19148 rtx set_dest;
19149 rtx shift_rtx;
19150 int i;
19152 /* Retrieve destination of SET_BODY. */
19153 switch (GET_CODE (set_body))
19155 case SET:
19156 set_dest = SET_DEST (set_body);
19157 if (!set_dest || !REG_P (set_dest))
19158 return false;
19159 break;
19160 case PARALLEL:
19161 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19162 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19163 use_body))
19164 return true;
19165 default:
19166 return false;
19167 break;
19170 /* Retrieve shift count of USE_BODY. */
19171 switch (GET_CODE (use_body))
19173 case SET:
19174 shift_rtx = XEXP (use_body, 1);
19175 break;
19176 case PARALLEL:
19177 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19178 if (ix86_dep_by_shift_count_body (set_body,
19179 XVECEXP (use_body, 0, i)))
19180 return true;
19181 default:
19182 return false;
19183 break;
19186 if (shift_rtx
19187 && (GET_CODE (shift_rtx) == ASHIFT
19188 || GET_CODE (shift_rtx) == LSHIFTRT
19189 || GET_CODE (shift_rtx) == ASHIFTRT
19190 || GET_CODE (shift_rtx) == ROTATE
19191 || GET_CODE (shift_rtx) == ROTATERT))
19193 rtx shift_count = XEXP (shift_rtx, 1);
19195 /* Return true if shift count is dest of SET_BODY. */
19196 if (REG_P (shift_count))
19198 /* Add check since it can be invoked before register
19199 allocation in pre-reload schedule. */
19200 if (reload_completed
19201 && true_regnum (set_dest) == true_regnum (shift_count))
19202 return true;
19203 else if (REGNO(set_dest) == REGNO(shift_count))
19204 return true;
19208 return false;
19211 /* Return true if destination reg of SET_INSN is shift count of
19212 USE_INSN. */
19214 bool
19215 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19217 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19218 PATTERN (use_insn));
19221 /* Return TRUE or FALSE depending on whether the unary operator meets the
19222 appropriate constraints. */
19224 bool
19225 ix86_unary_operator_ok (enum rtx_code,
19226 machine_mode,
19227 rtx operands[2])
19229 /* If one of operands is memory, source and destination must match. */
19230 if ((MEM_P (operands[0])
19231 || MEM_P (operands[1]))
19232 && ! rtx_equal_p (operands[0], operands[1]))
19233 return false;
19234 return true;
19237 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19238 are ok, keeping in mind the possible movddup alternative. */
19240 bool
19241 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19243 if (MEM_P (operands[0]))
19244 return rtx_equal_p (operands[0], operands[1 + high]);
19245 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19246 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19247 return true;
19250 /* Post-reload splitter for converting an SF or DFmode value in an
19251 SSE register into an unsigned SImode. */
19253 void
19254 ix86_split_convert_uns_si_sse (rtx operands[])
19256 machine_mode vecmode;
19257 rtx value, large, zero_or_two31, input, two31, x;
19259 large = operands[1];
19260 zero_or_two31 = operands[2];
19261 input = operands[3];
19262 two31 = operands[4];
19263 vecmode = GET_MODE (large);
19264 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19266 /* Load up the value into the low element. We must ensure that the other
19267 elements are valid floats -- zero is the easiest such value. */
19268 if (MEM_P (input))
19270 if (vecmode == V4SFmode)
19271 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19272 else
19273 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19275 else
19277 input = gen_rtx_REG (vecmode, REGNO (input));
19278 emit_move_insn (value, CONST0_RTX (vecmode));
19279 if (vecmode == V4SFmode)
19280 emit_insn (gen_sse_movss (value, value, input));
19281 else
19282 emit_insn (gen_sse2_movsd (value, value, input));
19285 emit_move_insn (large, two31);
19286 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19288 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19289 emit_insn (gen_rtx_SET (large, x));
19291 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19292 emit_insn (gen_rtx_SET (zero_or_two31, x));
19294 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19295 emit_insn (gen_rtx_SET (value, x));
19297 large = gen_rtx_REG (V4SImode, REGNO (large));
19298 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19300 x = gen_rtx_REG (V4SImode, REGNO (value));
19301 if (vecmode == V4SFmode)
19302 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19303 else
19304 emit_insn (gen_sse2_cvttpd2dq (x, value));
19305 value = x;
19307 emit_insn (gen_xorv4si3 (value, value, large));
19310 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19311 Expects the 64-bit DImode to be supplied in a pair of integral
19312 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19313 -mfpmath=sse, !optimize_size only. */
19315 void
19316 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19318 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19319 rtx int_xmm, fp_xmm;
19320 rtx biases, exponents;
19321 rtx x;
19323 int_xmm = gen_reg_rtx (V4SImode);
19324 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19325 emit_insn (gen_movdi_to_sse (int_xmm, input));
19326 else if (TARGET_SSE_SPLIT_REGS)
19328 emit_clobber (int_xmm);
19329 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19331 else
19333 x = gen_reg_rtx (V2DImode);
19334 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19335 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19338 x = gen_rtx_CONST_VECTOR (V4SImode,
19339 gen_rtvec (4, GEN_INT (0x43300000UL),
19340 GEN_INT (0x45300000UL),
19341 const0_rtx, const0_rtx));
19342 exponents = validize_mem (force_const_mem (V4SImode, x));
19344 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19345 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19347 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19348 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19349 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19350 (0x1.0p84 + double(fp_value_hi_xmm)).
19351 Note these exponents differ by 32. */
19353 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19355 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19356 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19357 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19358 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19359 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19360 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19361 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19362 biases = validize_mem (force_const_mem (V2DFmode, biases));
19363 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19365 /* Add the upper and lower DFmode values together. */
19366 if (TARGET_SSE3)
19367 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19368 else
19370 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19371 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19372 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19375 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19378 /* Not used, but eases macroization of patterns. */
19379 void
19380 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19382 gcc_unreachable ();
19385 /* Convert an unsigned SImode value into a DFmode. Only currently used
19386 for SSE, but applicable anywhere. */
19388 void
19389 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19391 REAL_VALUE_TYPE TWO31r;
19392 rtx x, fp;
19394 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19395 NULL, 1, OPTAB_DIRECT);
19397 fp = gen_reg_rtx (DFmode);
19398 emit_insn (gen_floatsidf2 (fp, x));
19400 real_ldexp (&TWO31r, &dconst1, 31);
19401 x = const_double_from_real_value (TWO31r, DFmode);
19403 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19404 if (x != target)
19405 emit_move_insn (target, x);
19408 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19409 32-bit mode; otherwise we have a direct convert instruction. */
19411 void
19412 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19414 REAL_VALUE_TYPE TWO32r;
19415 rtx fp_lo, fp_hi, x;
19417 fp_lo = gen_reg_rtx (DFmode);
19418 fp_hi = gen_reg_rtx (DFmode);
19420 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19422 real_ldexp (&TWO32r, &dconst1, 32);
19423 x = const_double_from_real_value (TWO32r, DFmode);
19424 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19426 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19428 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19429 0, OPTAB_DIRECT);
19430 if (x != target)
19431 emit_move_insn (target, x);
19434 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19435 For x86_32, -mfpmath=sse, !optimize_size only. */
19436 void
19437 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19439 REAL_VALUE_TYPE ONE16r;
19440 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19442 real_ldexp (&ONE16r, &dconst1, 16);
19443 x = const_double_from_real_value (ONE16r, SFmode);
19444 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19445 NULL, 0, OPTAB_DIRECT);
19446 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19447 NULL, 0, OPTAB_DIRECT);
19448 fp_hi = gen_reg_rtx (SFmode);
19449 fp_lo = gen_reg_rtx (SFmode);
19450 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19451 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19452 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19453 0, OPTAB_DIRECT);
19454 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19455 0, OPTAB_DIRECT);
19456 if (!rtx_equal_p (target, fp_hi))
19457 emit_move_insn (target, fp_hi);
19460 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19461 a vector of unsigned ints VAL to vector of floats TARGET. */
19463 void
19464 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19466 rtx tmp[8];
19467 REAL_VALUE_TYPE TWO16r;
19468 machine_mode intmode = GET_MODE (val);
19469 machine_mode fltmode = GET_MODE (target);
19470 rtx (*cvt) (rtx, rtx);
19472 if (intmode == V4SImode)
19473 cvt = gen_floatv4siv4sf2;
19474 else
19475 cvt = gen_floatv8siv8sf2;
19476 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19477 tmp[0] = force_reg (intmode, tmp[0]);
19478 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19479 OPTAB_DIRECT);
19480 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19481 NULL_RTX, 1, OPTAB_DIRECT);
19482 tmp[3] = gen_reg_rtx (fltmode);
19483 emit_insn (cvt (tmp[3], tmp[1]));
19484 tmp[4] = gen_reg_rtx (fltmode);
19485 emit_insn (cvt (tmp[4], tmp[2]));
19486 real_ldexp (&TWO16r, &dconst1, 16);
19487 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19488 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19489 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19490 OPTAB_DIRECT);
19491 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19492 OPTAB_DIRECT);
19493 if (tmp[7] != target)
19494 emit_move_insn (target, tmp[7]);
19497 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19498 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19499 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19500 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19503 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19505 REAL_VALUE_TYPE TWO31r;
19506 rtx two31r, tmp[4];
19507 machine_mode mode = GET_MODE (val);
19508 machine_mode scalarmode = GET_MODE_INNER (mode);
19509 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19510 rtx (*cmp) (rtx, rtx, rtx, rtx);
19511 int i;
19513 for (i = 0; i < 3; i++)
19514 tmp[i] = gen_reg_rtx (mode);
19515 real_ldexp (&TWO31r, &dconst1, 31);
19516 two31r = const_double_from_real_value (TWO31r, scalarmode);
19517 two31r = ix86_build_const_vector (mode, 1, two31r);
19518 two31r = force_reg (mode, two31r);
19519 switch (mode)
19521 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19522 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19523 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19524 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19525 default: gcc_unreachable ();
19527 tmp[3] = gen_rtx_LE (mode, two31r, val);
19528 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19529 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19530 0, OPTAB_DIRECT);
19531 if (intmode == V4SImode || TARGET_AVX2)
19532 *xorp = expand_simple_binop (intmode, ASHIFT,
19533 gen_lowpart (intmode, tmp[0]),
19534 GEN_INT (31), NULL_RTX, 0,
19535 OPTAB_DIRECT);
19536 else
19538 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19539 two31 = ix86_build_const_vector (intmode, 1, two31);
19540 *xorp = expand_simple_binop (intmode, AND,
19541 gen_lowpart (intmode, tmp[0]),
19542 two31, NULL_RTX, 0,
19543 OPTAB_DIRECT);
19545 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19546 0, OPTAB_DIRECT);
19549 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19550 then replicate the value for all elements of the vector
19551 register. */
19554 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19556 int i, n_elt;
19557 rtvec v;
19558 machine_mode scalar_mode;
19560 switch (mode)
19562 case V64QImode:
19563 case V32QImode:
19564 case V16QImode:
19565 case V32HImode:
19566 case V16HImode:
19567 case V8HImode:
19568 case V16SImode:
19569 case V8SImode:
19570 case V4SImode:
19571 case V8DImode:
19572 case V4DImode:
19573 case V2DImode:
19574 gcc_assert (vect);
19575 case V16SFmode:
19576 case V8SFmode:
19577 case V4SFmode:
19578 case V8DFmode:
19579 case V4DFmode:
19580 case V2DFmode:
19581 n_elt = GET_MODE_NUNITS (mode);
19582 v = rtvec_alloc (n_elt);
19583 scalar_mode = GET_MODE_INNER (mode);
19585 RTVEC_ELT (v, 0) = value;
19587 for (i = 1; i < n_elt; ++i)
19588 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19590 return gen_rtx_CONST_VECTOR (mode, v);
19592 default:
19593 gcc_unreachable ();
19597 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19598 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19599 for an SSE register. If VECT is true, then replicate the mask for
19600 all elements of the vector register. If INVERT is true, then create
19601 a mask excluding the sign bit. */
19604 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19606 machine_mode vec_mode, imode;
19607 wide_int w;
19608 rtx mask, v;
19610 switch (mode)
19612 case V16SImode:
19613 case V16SFmode:
19614 case V8SImode:
19615 case V4SImode:
19616 case V8SFmode:
19617 case V4SFmode:
19618 vec_mode = mode;
19619 imode = SImode;
19620 break;
19622 case V8DImode:
19623 case V4DImode:
19624 case V2DImode:
19625 case V8DFmode:
19626 case V4DFmode:
19627 case V2DFmode:
19628 vec_mode = mode;
19629 imode = DImode;
19630 break;
19632 case TImode:
19633 case TFmode:
19634 vec_mode = VOIDmode;
19635 imode = TImode;
19636 break;
19638 default:
19639 gcc_unreachable ();
19642 machine_mode inner_mode = GET_MODE_INNER (mode);
19643 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
19644 GET_MODE_BITSIZE (inner_mode));
19645 if (invert)
19646 w = wi::bit_not (w);
19648 /* Force this value into the low part of a fp vector constant. */
19649 mask = immed_wide_int_const (w, imode);
19650 mask = gen_lowpart (inner_mode, mask);
19652 if (vec_mode == VOIDmode)
19653 return force_reg (inner_mode, mask);
19655 v = ix86_build_const_vector (vec_mode, vect, mask);
19656 return force_reg (vec_mode, v);
19659 /* Generate code for floating point ABS or NEG. */
19661 void
19662 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19663 rtx operands[])
19665 rtx mask, set, dst, src;
19666 bool use_sse = false;
19667 bool vector_mode = VECTOR_MODE_P (mode);
19668 machine_mode vmode = mode;
19670 if (vector_mode)
19671 use_sse = true;
19672 else if (mode == TFmode)
19673 use_sse = true;
19674 else if (TARGET_SSE_MATH)
19676 use_sse = SSE_FLOAT_MODE_P (mode);
19677 if (mode == SFmode)
19678 vmode = V4SFmode;
19679 else if (mode == DFmode)
19680 vmode = V2DFmode;
19683 /* NEG and ABS performed with SSE use bitwise mask operations.
19684 Create the appropriate mask now. */
19685 if (use_sse)
19686 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19687 else
19688 mask = NULL_RTX;
19690 dst = operands[0];
19691 src = operands[1];
19693 set = gen_rtx_fmt_e (code, mode, src);
19694 set = gen_rtx_SET (dst, set);
19696 if (mask)
19698 rtx use, clob;
19699 rtvec par;
19701 use = gen_rtx_USE (VOIDmode, mask);
19702 if (vector_mode)
19703 par = gen_rtvec (2, set, use);
19704 else
19706 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19707 par = gen_rtvec (3, set, use, clob);
19709 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19711 else
19712 emit_insn (set);
19715 /* Expand a copysign operation. Special case operand 0 being a constant. */
19717 void
19718 ix86_expand_copysign (rtx operands[])
19720 machine_mode mode, vmode;
19721 rtx dest, op0, op1, mask, nmask;
19723 dest = operands[0];
19724 op0 = operands[1];
19725 op1 = operands[2];
19727 mode = GET_MODE (dest);
19729 if (mode == SFmode)
19730 vmode = V4SFmode;
19731 else if (mode == DFmode)
19732 vmode = V2DFmode;
19733 else
19734 vmode = mode;
19736 if (CONST_DOUBLE_P (op0))
19738 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19740 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19741 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19743 if (mode == SFmode || mode == DFmode)
19745 if (op0 == CONST0_RTX (mode))
19746 op0 = CONST0_RTX (vmode);
19747 else
19749 rtx v = ix86_build_const_vector (vmode, false, op0);
19751 op0 = force_reg (vmode, v);
19754 else if (op0 != CONST0_RTX (mode))
19755 op0 = force_reg (mode, op0);
19757 mask = ix86_build_signbit_mask (vmode, 0, 0);
19759 if (mode == SFmode)
19760 copysign_insn = gen_copysignsf3_const;
19761 else if (mode == DFmode)
19762 copysign_insn = gen_copysigndf3_const;
19763 else
19764 copysign_insn = gen_copysigntf3_const;
19766 emit_insn (copysign_insn (dest, op0, op1, mask));
19768 else
19770 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19772 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19773 mask = ix86_build_signbit_mask (vmode, 0, 0);
19775 if (mode == SFmode)
19776 copysign_insn = gen_copysignsf3_var;
19777 else if (mode == DFmode)
19778 copysign_insn = gen_copysigndf3_var;
19779 else
19780 copysign_insn = gen_copysigntf3_var;
19782 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19786 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19787 be a constant, and so has already been expanded into a vector constant. */
19789 void
19790 ix86_split_copysign_const (rtx operands[])
19792 machine_mode mode, vmode;
19793 rtx dest, op0, mask, x;
19795 dest = operands[0];
19796 op0 = operands[1];
19797 mask = operands[3];
19799 mode = GET_MODE (dest);
19800 vmode = GET_MODE (mask);
19802 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19803 x = gen_rtx_AND (vmode, dest, mask);
19804 emit_insn (gen_rtx_SET (dest, x));
19806 if (op0 != CONST0_RTX (vmode))
19808 x = gen_rtx_IOR (vmode, dest, op0);
19809 emit_insn (gen_rtx_SET (dest, x));
19813 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19814 so we have to do two masks. */
19816 void
19817 ix86_split_copysign_var (rtx operands[])
19819 machine_mode mode, vmode;
19820 rtx dest, scratch, op0, op1, mask, nmask, x;
19822 dest = operands[0];
19823 scratch = operands[1];
19824 op0 = operands[2];
19825 op1 = operands[3];
19826 nmask = operands[4];
19827 mask = operands[5];
19829 mode = GET_MODE (dest);
19830 vmode = GET_MODE (mask);
19832 if (rtx_equal_p (op0, op1))
19834 /* Shouldn't happen often (it's useless, obviously), but when it does
19835 we'd generate incorrect code if we continue below. */
19836 emit_move_insn (dest, op0);
19837 return;
19840 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19842 gcc_assert (REGNO (op1) == REGNO (scratch));
19844 x = gen_rtx_AND (vmode, scratch, mask);
19845 emit_insn (gen_rtx_SET (scratch, x));
19847 dest = mask;
19848 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19849 x = gen_rtx_NOT (vmode, dest);
19850 x = gen_rtx_AND (vmode, x, op0);
19851 emit_insn (gen_rtx_SET (dest, x));
19853 else
19855 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19857 x = gen_rtx_AND (vmode, scratch, mask);
19859 else /* alternative 2,4 */
19861 gcc_assert (REGNO (mask) == REGNO (scratch));
19862 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19863 x = gen_rtx_AND (vmode, scratch, op1);
19865 emit_insn (gen_rtx_SET (scratch, x));
19867 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19869 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19870 x = gen_rtx_AND (vmode, dest, nmask);
19872 else /* alternative 3,4 */
19874 gcc_assert (REGNO (nmask) == REGNO (dest));
19875 dest = nmask;
19876 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19877 x = gen_rtx_AND (vmode, dest, op0);
19879 emit_insn (gen_rtx_SET (dest, x));
19882 x = gen_rtx_IOR (vmode, dest, scratch);
19883 emit_insn (gen_rtx_SET (dest, x));
19886 /* Return TRUE or FALSE depending on whether the first SET in INSN
19887 has source and destination with matching CC modes, and that the
19888 CC mode is at least as constrained as REQ_MODE. */
19890 bool
19891 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19893 rtx set;
19894 machine_mode set_mode;
19896 set = PATTERN (insn);
19897 if (GET_CODE (set) == PARALLEL)
19898 set = XVECEXP (set, 0, 0);
19899 gcc_assert (GET_CODE (set) == SET);
19900 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19902 set_mode = GET_MODE (SET_DEST (set));
19903 switch (set_mode)
19905 case CCNOmode:
19906 if (req_mode != CCNOmode
19907 && (req_mode != CCmode
19908 || XEXP (SET_SRC (set), 1) != const0_rtx))
19909 return false;
19910 break;
19911 case CCmode:
19912 if (req_mode == CCGCmode)
19913 return false;
19914 /* FALLTHRU */
19915 case CCGCmode:
19916 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19917 return false;
19918 /* FALLTHRU */
19919 case CCGOCmode:
19920 if (req_mode == CCZmode)
19921 return false;
19922 /* FALLTHRU */
19923 case CCZmode:
19924 break;
19926 case CCAmode:
19927 case CCCmode:
19928 case CCOmode:
19929 case CCPmode:
19930 case CCSmode:
19931 if (set_mode != req_mode)
19932 return false;
19933 break;
19935 default:
19936 gcc_unreachable ();
19939 return GET_MODE (SET_SRC (set)) == set_mode;
19942 /* Generate insn patterns to do an integer compare of OPERANDS. */
19944 static rtx
19945 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19947 machine_mode cmpmode;
19948 rtx tmp, flags;
19950 cmpmode = SELECT_CC_MODE (code, op0, op1);
19951 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19953 /* This is very simple, but making the interface the same as in the
19954 FP case makes the rest of the code easier. */
19955 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19956 emit_insn (gen_rtx_SET (flags, tmp));
19958 /* Return the test that should be put into the flags user, i.e.
19959 the bcc, scc, or cmov instruction. */
19960 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19963 /* Figure out whether to use ordered or unordered fp comparisons.
19964 Return the appropriate mode to use. */
19966 machine_mode
19967 ix86_fp_compare_mode (enum rtx_code)
19969 /* ??? In order to make all comparisons reversible, we do all comparisons
19970 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19971 all forms trapping and nontrapping comparisons, we can make inequality
19972 comparisons trapping again, since it results in better code when using
19973 FCOM based compares. */
19974 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19977 machine_mode
19978 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19980 machine_mode mode = GET_MODE (op0);
19982 if (SCALAR_FLOAT_MODE_P (mode))
19984 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19985 return ix86_fp_compare_mode (code);
19988 switch (code)
19990 /* Only zero flag is needed. */
19991 case EQ: /* ZF=0 */
19992 case NE: /* ZF!=0 */
19993 return CCZmode;
19994 /* Codes needing carry flag. */
19995 case GEU: /* CF=0 */
19996 case LTU: /* CF=1 */
19997 /* Detect overflow checks. They need just the carry flag. */
19998 if (GET_CODE (op0) == PLUS
19999 && rtx_equal_p (op1, XEXP (op0, 0)))
20000 return CCCmode;
20001 else
20002 return CCmode;
20003 case GTU: /* CF=0 & ZF=0 */
20004 case LEU: /* CF=1 | ZF=1 */
20005 return CCmode;
20006 /* Codes possibly doable only with sign flag when
20007 comparing against zero. */
20008 case GE: /* SF=OF or SF=0 */
20009 case LT: /* SF<>OF or SF=1 */
20010 if (op1 == const0_rtx)
20011 return CCGOCmode;
20012 else
20013 /* For other cases Carry flag is not required. */
20014 return CCGCmode;
20015 /* Codes doable only with sign flag when comparing
20016 against zero, but we miss jump instruction for it
20017 so we need to use relational tests against overflow
20018 that thus needs to be zero. */
20019 case GT: /* ZF=0 & SF=OF */
20020 case LE: /* ZF=1 | SF<>OF */
20021 if (op1 == const0_rtx)
20022 return CCNOmode;
20023 else
20024 return CCGCmode;
20025 /* strcmp pattern do (use flags) and combine may ask us for proper
20026 mode. */
20027 case USE:
20028 return CCmode;
20029 default:
20030 gcc_unreachable ();
20034 /* Return the fixed registers used for condition codes. */
20036 static bool
20037 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
20039 *p1 = FLAGS_REG;
20040 *p2 = FPSR_REG;
20041 return true;
20044 /* If two condition code modes are compatible, return a condition code
20045 mode which is compatible with both. Otherwise, return
20046 VOIDmode. */
20048 static machine_mode
20049 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20051 if (m1 == m2)
20052 return m1;
20054 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20055 return VOIDmode;
20057 if ((m1 == CCGCmode && m2 == CCGOCmode)
20058 || (m1 == CCGOCmode && m2 == CCGCmode))
20059 return CCGCmode;
20061 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20062 return m2;
20063 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20064 return m1;
20066 switch (m1)
20068 default:
20069 gcc_unreachable ();
20071 case CCmode:
20072 case CCGCmode:
20073 case CCGOCmode:
20074 case CCNOmode:
20075 case CCAmode:
20076 case CCCmode:
20077 case CCOmode:
20078 case CCPmode:
20079 case CCSmode:
20080 case CCZmode:
20081 switch (m2)
20083 default:
20084 return VOIDmode;
20086 case CCmode:
20087 case CCGCmode:
20088 case CCGOCmode:
20089 case CCNOmode:
20090 case CCAmode:
20091 case CCCmode:
20092 case CCOmode:
20093 case CCPmode:
20094 case CCSmode:
20095 case CCZmode:
20096 return CCmode;
20099 case CCFPmode:
20100 case CCFPUmode:
20101 /* These are only compatible with themselves, which we already
20102 checked above. */
20103 return VOIDmode;
20108 /* Return a comparison we can do and that it is equivalent to
20109 swap_condition (code) apart possibly from orderedness.
20110 But, never change orderedness if TARGET_IEEE_FP, returning
20111 UNKNOWN in that case if necessary. */
20113 static enum rtx_code
20114 ix86_fp_swap_condition (enum rtx_code code)
20116 switch (code)
20118 case GT: /* GTU - CF=0 & ZF=0 */
20119 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20120 case GE: /* GEU - CF=0 */
20121 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20122 case UNLT: /* LTU - CF=1 */
20123 return TARGET_IEEE_FP ? UNKNOWN : GT;
20124 case UNLE: /* LEU - CF=1 | ZF=1 */
20125 return TARGET_IEEE_FP ? UNKNOWN : GE;
20126 default:
20127 return swap_condition (code);
20131 /* Return cost of comparison CODE using the best strategy for performance.
20132 All following functions do use number of instructions as a cost metrics.
20133 In future this should be tweaked to compute bytes for optimize_size and
20134 take into account performance of various instructions on various CPUs. */
20136 static int
20137 ix86_fp_comparison_cost (enum rtx_code code)
20139 int arith_cost;
20141 /* The cost of code using bit-twiddling on %ah. */
20142 switch (code)
20144 case UNLE:
20145 case UNLT:
20146 case LTGT:
20147 case GT:
20148 case GE:
20149 case UNORDERED:
20150 case ORDERED:
20151 case UNEQ:
20152 arith_cost = 4;
20153 break;
20154 case LT:
20155 case NE:
20156 case EQ:
20157 case UNGE:
20158 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20159 break;
20160 case LE:
20161 case UNGT:
20162 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20163 break;
20164 default:
20165 gcc_unreachable ();
20168 switch (ix86_fp_comparison_strategy (code))
20170 case IX86_FPCMP_COMI:
20171 return arith_cost > 4 ? 3 : 2;
20172 case IX86_FPCMP_SAHF:
20173 return arith_cost > 4 ? 4 : 3;
20174 default:
20175 return arith_cost;
20179 /* Return strategy to use for floating-point. We assume that fcomi is always
20180 preferrable where available, since that is also true when looking at size
20181 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20183 enum ix86_fpcmp_strategy
20184 ix86_fp_comparison_strategy (enum rtx_code)
20186 /* Do fcomi/sahf based test when profitable. */
20188 if (TARGET_CMOVE)
20189 return IX86_FPCMP_COMI;
20191 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20192 return IX86_FPCMP_SAHF;
20194 return IX86_FPCMP_ARITH;
20197 /* Swap, force into registers, or otherwise massage the two operands
20198 to a fp comparison. The operands are updated in place; the new
20199 comparison code is returned. */
20201 static enum rtx_code
20202 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20204 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20205 rtx op0 = *pop0, op1 = *pop1;
20206 machine_mode op_mode = GET_MODE (op0);
20207 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20209 /* All of the unordered compare instructions only work on registers.
20210 The same is true of the fcomi compare instructions. The XFmode
20211 compare instructions require registers except when comparing
20212 against zero or when converting operand 1 from fixed point to
20213 floating point. */
20215 if (!is_sse
20216 && (fpcmp_mode == CCFPUmode
20217 || (op_mode == XFmode
20218 && ! (standard_80387_constant_p (op0) == 1
20219 || standard_80387_constant_p (op1) == 1)
20220 && GET_CODE (op1) != FLOAT)
20221 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20223 op0 = force_reg (op_mode, op0);
20224 op1 = force_reg (op_mode, op1);
20226 else
20228 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20229 things around if they appear profitable, otherwise force op0
20230 into a register. */
20232 if (standard_80387_constant_p (op0) == 0
20233 || (MEM_P (op0)
20234 && ! (standard_80387_constant_p (op1) == 0
20235 || MEM_P (op1))))
20237 enum rtx_code new_code = ix86_fp_swap_condition (code);
20238 if (new_code != UNKNOWN)
20240 std::swap (op0, op1);
20241 code = new_code;
20245 if (!REG_P (op0))
20246 op0 = force_reg (op_mode, op0);
20248 if (CONSTANT_P (op1))
20250 int tmp = standard_80387_constant_p (op1);
20251 if (tmp == 0)
20252 op1 = validize_mem (force_const_mem (op_mode, op1));
20253 else if (tmp == 1)
20255 if (TARGET_CMOVE)
20256 op1 = force_reg (op_mode, op1);
20258 else
20259 op1 = force_reg (op_mode, op1);
20263 /* Try to rearrange the comparison to make it cheaper. */
20264 if (ix86_fp_comparison_cost (code)
20265 > ix86_fp_comparison_cost (swap_condition (code))
20266 && (REG_P (op1) || can_create_pseudo_p ()))
20268 std::swap (op0, op1);
20269 code = swap_condition (code);
20270 if (!REG_P (op0))
20271 op0 = force_reg (op_mode, op0);
20274 *pop0 = op0;
20275 *pop1 = op1;
20276 return code;
20279 /* Convert comparison codes we use to represent FP comparison to integer
20280 code that will result in proper branch. Return UNKNOWN if no such code
20281 is available. */
20283 enum rtx_code
20284 ix86_fp_compare_code_to_integer (enum rtx_code code)
20286 switch (code)
20288 case GT:
20289 return GTU;
20290 case GE:
20291 return GEU;
20292 case ORDERED:
20293 case UNORDERED:
20294 return code;
20295 break;
20296 case UNEQ:
20297 return EQ;
20298 break;
20299 case UNLT:
20300 return LTU;
20301 break;
20302 case UNLE:
20303 return LEU;
20304 break;
20305 case LTGT:
20306 return NE;
20307 break;
20308 default:
20309 return UNKNOWN;
20313 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20315 static rtx
20316 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20318 machine_mode fpcmp_mode, intcmp_mode;
20319 rtx tmp, tmp2;
20321 fpcmp_mode = ix86_fp_compare_mode (code);
20322 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20324 /* Do fcomi/sahf based test when profitable. */
20325 switch (ix86_fp_comparison_strategy (code))
20327 case IX86_FPCMP_COMI:
20328 intcmp_mode = fpcmp_mode;
20329 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20330 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20331 emit_insn (tmp);
20332 break;
20334 case IX86_FPCMP_SAHF:
20335 intcmp_mode = fpcmp_mode;
20336 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20337 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20339 if (!scratch)
20340 scratch = gen_reg_rtx (HImode);
20341 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20342 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20343 break;
20345 case IX86_FPCMP_ARITH:
20346 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20347 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20348 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20349 if (!scratch)
20350 scratch = gen_reg_rtx (HImode);
20351 emit_insn (gen_rtx_SET (scratch, tmp2));
20353 /* In the unordered case, we have to check C2 for NaN's, which
20354 doesn't happen to work out to anything nice combination-wise.
20355 So do some bit twiddling on the value we've got in AH to come
20356 up with an appropriate set of condition codes. */
20358 intcmp_mode = CCNOmode;
20359 switch (code)
20361 case GT:
20362 case UNGT:
20363 if (code == GT || !TARGET_IEEE_FP)
20365 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20366 code = EQ;
20368 else
20370 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20371 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20372 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20373 intcmp_mode = CCmode;
20374 code = GEU;
20376 break;
20377 case LT:
20378 case UNLT:
20379 if (code == LT && TARGET_IEEE_FP)
20381 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20382 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20383 intcmp_mode = CCmode;
20384 code = EQ;
20386 else
20388 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20389 code = NE;
20391 break;
20392 case GE:
20393 case UNGE:
20394 if (code == GE || !TARGET_IEEE_FP)
20396 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20397 code = EQ;
20399 else
20401 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20402 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20403 code = NE;
20405 break;
20406 case LE:
20407 case UNLE:
20408 if (code == LE && TARGET_IEEE_FP)
20410 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20411 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20412 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20413 intcmp_mode = CCmode;
20414 code = LTU;
20416 else
20418 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20419 code = NE;
20421 break;
20422 case EQ:
20423 case UNEQ:
20424 if (code == EQ && TARGET_IEEE_FP)
20426 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20427 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20428 intcmp_mode = CCmode;
20429 code = EQ;
20431 else
20433 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20434 code = NE;
20436 break;
20437 case NE:
20438 case LTGT:
20439 if (code == NE && TARGET_IEEE_FP)
20441 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20442 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20443 GEN_INT (0x40)));
20444 code = NE;
20446 else
20448 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20449 code = EQ;
20451 break;
20453 case UNORDERED:
20454 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20455 code = NE;
20456 break;
20457 case ORDERED:
20458 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20459 code = EQ;
20460 break;
20462 default:
20463 gcc_unreachable ();
20465 break;
20467 default:
20468 gcc_unreachable();
20471 /* Return the test that should be put into the flags user, i.e.
20472 the bcc, scc, or cmov instruction. */
20473 return gen_rtx_fmt_ee (code, VOIDmode,
20474 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20475 const0_rtx);
20478 static rtx
20479 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20481 rtx ret;
20483 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20484 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20486 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20488 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20489 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20491 else
20492 ret = ix86_expand_int_compare (code, op0, op1);
20494 return ret;
20497 void
20498 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20500 machine_mode mode = GET_MODE (op0);
20501 rtx tmp;
20503 switch (mode)
20505 case SFmode:
20506 case DFmode:
20507 case XFmode:
20508 case QImode:
20509 case HImode:
20510 case SImode:
20511 simple:
20512 tmp = ix86_expand_compare (code, op0, op1);
20513 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20514 gen_rtx_LABEL_REF (VOIDmode, label),
20515 pc_rtx);
20516 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20517 return;
20519 case DImode:
20520 if (TARGET_64BIT)
20521 goto simple;
20522 case TImode:
20523 /* Expand DImode branch into multiple compare+branch. */
20525 rtx lo[2], hi[2];
20526 rtx_code_label *label2;
20527 enum rtx_code code1, code2, code3;
20528 machine_mode submode;
20530 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20532 std::swap (op0, op1);
20533 code = swap_condition (code);
20536 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20537 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20539 submode = mode == DImode ? SImode : DImode;
20541 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20542 avoid two branches. This costs one extra insn, so disable when
20543 optimizing for size. */
20545 if ((code == EQ || code == NE)
20546 && (!optimize_insn_for_size_p ()
20547 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20549 rtx xor0, xor1;
20551 xor1 = hi[0];
20552 if (hi[1] != const0_rtx)
20553 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20554 NULL_RTX, 0, OPTAB_WIDEN);
20556 xor0 = lo[0];
20557 if (lo[1] != const0_rtx)
20558 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20559 NULL_RTX, 0, OPTAB_WIDEN);
20561 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20562 NULL_RTX, 0, OPTAB_WIDEN);
20564 ix86_expand_branch (code, tmp, const0_rtx, label);
20565 return;
20568 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20569 op1 is a constant and the low word is zero, then we can just
20570 examine the high word. Similarly for low word -1 and
20571 less-or-equal-than or greater-than. */
20573 if (CONST_INT_P (hi[1]))
20574 switch (code)
20576 case LT: case LTU: case GE: case GEU:
20577 if (lo[1] == const0_rtx)
20579 ix86_expand_branch (code, hi[0], hi[1], label);
20580 return;
20582 break;
20583 case LE: case LEU: case GT: case GTU:
20584 if (lo[1] == constm1_rtx)
20586 ix86_expand_branch (code, hi[0], hi[1], label);
20587 return;
20589 break;
20590 default:
20591 break;
20594 /* Otherwise, we need two or three jumps. */
20596 label2 = gen_label_rtx ();
20598 code1 = code;
20599 code2 = swap_condition (code);
20600 code3 = unsigned_condition (code);
20602 switch (code)
20604 case LT: case GT: case LTU: case GTU:
20605 break;
20607 case LE: code1 = LT; code2 = GT; break;
20608 case GE: code1 = GT; code2 = LT; break;
20609 case LEU: code1 = LTU; code2 = GTU; break;
20610 case GEU: code1 = GTU; code2 = LTU; break;
20612 case EQ: code1 = UNKNOWN; code2 = NE; break;
20613 case NE: code2 = UNKNOWN; break;
20615 default:
20616 gcc_unreachable ();
20620 * a < b =>
20621 * if (hi(a) < hi(b)) goto true;
20622 * if (hi(a) > hi(b)) goto false;
20623 * if (lo(a) < lo(b)) goto true;
20624 * false:
20627 if (code1 != UNKNOWN)
20628 ix86_expand_branch (code1, hi[0], hi[1], label);
20629 if (code2 != UNKNOWN)
20630 ix86_expand_branch (code2, hi[0], hi[1], label2);
20632 ix86_expand_branch (code3, lo[0], lo[1], label);
20634 if (code2 != UNKNOWN)
20635 emit_label (label2);
20636 return;
20639 default:
20640 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20641 goto simple;
20645 /* Split branch based on floating point condition. */
20646 void
20647 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20648 rtx target1, rtx target2, rtx tmp)
20650 rtx condition;
20651 rtx i;
20653 if (target2 != pc_rtx)
20655 std::swap (target1, target2);
20656 code = reverse_condition_maybe_unordered (code);
20659 condition = ix86_expand_fp_compare (code, op1, op2,
20660 tmp);
20662 i = emit_jump_insn (gen_rtx_SET
20663 (pc_rtx,
20664 gen_rtx_IF_THEN_ELSE (VOIDmode,
20665 condition, target1, target2)));
20666 if (split_branch_probability >= 0)
20667 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20670 void
20671 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20673 rtx ret;
20675 gcc_assert (GET_MODE (dest) == QImode);
20677 ret = ix86_expand_compare (code, op0, op1);
20678 PUT_MODE (ret, QImode);
20679 emit_insn (gen_rtx_SET (dest, ret));
20682 /* Expand comparison setting or clearing carry flag. Return true when
20683 successful and set pop for the operation. */
20684 static bool
20685 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20687 machine_mode mode =
20688 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20690 /* Do not handle double-mode compares that go through special path. */
20691 if (mode == (TARGET_64BIT ? TImode : DImode))
20692 return false;
20694 if (SCALAR_FLOAT_MODE_P (mode))
20696 rtx compare_op;
20697 rtx_insn *compare_seq;
20699 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20701 /* Shortcut: following common codes never translate
20702 into carry flag compares. */
20703 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20704 || code == ORDERED || code == UNORDERED)
20705 return false;
20707 /* These comparisons require zero flag; swap operands so they won't. */
20708 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20709 && !TARGET_IEEE_FP)
20711 std::swap (op0, op1);
20712 code = swap_condition (code);
20715 /* Try to expand the comparison and verify that we end up with
20716 carry flag based comparison. This fails to be true only when
20717 we decide to expand comparison using arithmetic that is not
20718 too common scenario. */
20719 start_sequence ();
20720 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20721 compare_seq = get_insns ();
20722 end_sequence ();
20724 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20725 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20726 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20727 else
20728 code = GET_CODE (compare_op);
20730 if (code != LTU && code != GEU)
20731 return false;
20733 emit_insn (compare_seq);
20734 *pop = compare_op;
20735 return true;
20738 if (!INTEGRAL_MODE_P (mode))
20739 return false;
20741 switch (code)
20743 case LTU:
20744 case GEU:
20745 break;
20747 /* Convert a==0 into (unsigned)a<1. */
20748 case EQ:
20749 case NE:
20750 if (op1 != const0_rtx)
20751 return false;
20752 op1 = const1_rtx;
20753 code = (code == EQ ? LTU : GEU);
20754 break;
20756 /* Convert a>b into b<a or a>=b-1. */
20757 case GTU:
20758 case LEU:
20759 if (CONST_INT_P (op1))
20761 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20762 /* Bail out on overflow. We still can swap operands but that
20763 would force loading of the constant into register. */
20764 if (op1 == const0_rtx
20765 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20766 return false;
20767 code = (code == GTU ? GEU : LTU);
20769 else
20771 std::swap (op0, op1);
20772 code = (code == GTU ? LTU : GEU);
20774 break;
20776 /* Convert a>=0 into (unsigned)a<0x80000000. */
20777 case LT:
20778 case GE:
20779 if (mode == DImode || op1 != const0_rtx)
20780 return false;
20781 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20782 code = (code == LT ? GEU : LTU);
20783 break;
20784 case LE:
20785 case GT:
20786 if (mode == DImode || op1 != constm1_rtx)
20787 return false;
20788 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20789 code = (code == LE ? GEU : LTU);
20790 break;
20792 default:
20793 return false;
20795 /* Swapping operands may cause constant to appear as first operand. */
20796 if (!nonimmediate_operand (op0, VOIDmode))
20798 if (!can_create_pseudo_p ())
20799 return false;
20800 op0 = force_reg (mode, op0);
20802 *pop = ix86_expand_compare (code, op0, op1);
20803 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20804 return true;
20807 bool
20808 ix86_expand_int_movcc (rtx operands[])
20810 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20811 rtx_insn *compare_seq;
20812 rtx compare_op;
20813 machine_mode mode = GET_MODE (operands[0]);
20814 bool sign_bit_compare_p = false;
20815 rtx op0 = XEXP (operands[1], 0);
20816 rtx op1 = XEXP (operands[1], 1);
20818 if (GET_MODE (op0) == TImode
20819 || (GET_MODE (op0) == DImode
20820 && !TARGET_64BIT))
20821 return false;
20823 start_sequence ();
20824 compare_op = ix86_expand_compare (code, op0, op1);
20825 compare_seq = get_insns ();
20826 end_sequence ();
20828 compare_code = GET_CODE (compare_op);
20830 if ((op1 == const0_rtx && (code == GE || code == LT))
20831 || (op1 == constm1_rtx && (code == GT || code == LE)))
20832 sign_bit_compare_p = true;
20834 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20835 HImode insns, we'd be swallowed in word prefix ops. */
20837 if ((mode != HImode || TARGET_FAST_PREFIX)
20838 && (mode != (TARGET_64BIT ? TImode : DImode))
20839 && CONST_INT_P (operands[2])
20840 && CONST_INT_P (operands[3]))
20842 rtx out = operands[0];
20843 HOST_WIDE_INT ct = INTVAL (operands[2]);
20844 HOST_WIDE_INT cf = INTVAL (operands[3]);
20845 HOST_WIDE_INT diff;
20847 diff = ct - cf;
20848 /* Sign bit compares are better done using shifts than we do by using
20849 sbb. */
20850 if (sign_bit_compare_p
20851 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20853 /* Detect overlap between destination and compare sources. */
20854 rtx tmp = out;
20856 if (!sign_bit_compare_p)
20858 rtx flags;
20859 bool fpcmp = false;
20861 compare_code = GET_CODE (compare_op);
20863 flags = XEXP (compare_op, 0);
20865 if (GET_MODE (flags) == CCFPmode
20866 || GET_MODE (flags) == CCFPUmode)
20868 fpcmp = true;
20869 compare_code
20870 = ix86_fp_compare_code_to_integer (compare_code);
20873 /* To simplify rest of code, restrict to the GEU case. */
20874 if (compare_code == LTU)
20876 std::swap (ct, cf);
20877 compare_code = reverse_condition (compare_code);
20878 code = reverse_condition (code);
20880 else
20882 if (fpcmp)
20883 PUT_CODE (compare_op,
20884 reverse_condition_maybe_unordered
20885 (GET_CODE (compare_op)));
20886 else
20887 PUT_CODE (compare_op,
20888 reverse_condition (GET_CODE (compare_op)));
20890 diff = ct - cf;
20892 if (reg_overlap_mentioned_p (out, op0)
20893 || reg_overlap_mentioned_p (out, op1))
20894 tmp = gen_reg_rtx (mode);
20896 if (mode == DImode)
20897 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20898 else
20899 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20900 flags, compare_op));
20902 else
20904 if (code == GT || code == GE)
20905 code = reverse_condition (code);
20906 else
20908 std::swap (ct, cf);
20909 diff = ct - cf;
20911 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20914 if (diff == 1)
20917 * cmpl op0,op1
20918 * sbbl dest,dest
20919 * [addl dest, ct]
20921 * Size 5 - 8.
20923 if (ct)
20924 tmp = expand_simple_binop (mode, PLUS,
20925 tmp, GEN_INT (ct),
20926 copy_rtx (tmp), 1, OPTAB_DIRECT);
20928 else if (cf == -1)
20931 * cmpl op0,op1
20932 * sbbl dest,dest
20933 * orl $ct, dest
20935 * Size 8.
20937 tmp = expand_simple_binop (mode, IOR,
20938 tmp, GEN_INT (ct),
20939 copy_rtx (tmp), 1, OPTAB_DIRECT);
20941 else if (diff == -1 && ct)
20944 * cmpl op0,op1
20945 * sbbl dest,dest
20946 * notl dest
20947 * [addl dest, cf]
20949 * Size 8 - 11.
20951 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20952 if (cf)
20953 tmp = expand_simple_binop (mode, PLUS,
20954 copy_rtx (tmp), GEN_INT (cf),
20955 copy_rtx (tmp), 1, OPTAB_DIRECT);
20957 else
20960 * cmpl op0,op1
20961 * sbbl dest,dest
20962 * [notl dest]
20963 * andl cf - ct, dest
20964 * [addl dest, ct]
20966 * Size 8 - 11.
20969 if (cf == 0)
20971 cf = ct;
20972 ct = 0;
20973 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20976 tmp = expand_simple_binop (mode, AND,
20977 copy_rtx (tmp),
20978 gen_int_mode (cf - ct, mode),
20979 copy_rtx (tmp), 1, OPTAB_DIRECT);
20980 if (ct)
20981 tmp = expand_simple_binop (mode, PLUS,
20982 copy_rtx (tmp), GEN_INT (ct),
20983 copy_rtx (tmp), 1, OPTAB_DIRECT);
20986 if (!rtx_equal_p (tmp, out))
20987 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20989 return true;
20992 if (diff < 0)
20994 machine_mode cmp_mode = GET_MODE (op0);
20995 enum rtx_code new_code;
20997 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20999 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21001 /* We may be reversing unordered compare to normal compare, that
21002 is not valid in general (we may convert non-trapping condition
21003 to trapping one), however on i386 we currently emit all
21004 comparisons unordered. */
21005 new_code = reverse_condition_maybe_unordered (code);
21007 else
21008 new_code = ix86_reverse_condition (code, cmp_mode);
21009 if (new_code != UNKNOWN)
21011 std::swap (ct, cf);
21012 diff = -diff;
21013 code = new_code;
21017 compare_code = UNKNOWN;
21018 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
21019 && CONST_INT_P (op1))
21021 if (op1 == const0_rtx
21022 && (code == LT || code == GE))
21023 compare_code = code;
21024 else if (op1 == constm1_rtx)
21026 if (code == LE)
21027 compare_code = LT;
21028 else if (code == GT)
21029 compare_code = GE;
21033 /* Optimize dest = (op0 < 0) ? -1 : cf. */
21034 if (compare_code != UNKNOWN
21035 && GET_MODE (op0) == GET_MODE (out)
21036 && (cf == -1 || ct == -1))
21038 /* If lea code below could be used, only optimize
21039 if it results in a 2 insn sequence. */
21041 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
21042 || diff == 3 || diff == 5 || diff == 9)
21043 || (compare_code == LT && ct == -1)
21044 || (compare_code == GE && cf == -1))
21047 * notl op1 (if necessary)
21048 * sarl $31, op1
21049 * orl cf, op1
21051 if (ct != -1)
21053 cf = ct;
21054 ct = -1;
21055 code = reverse_condition (code);
21058 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21060 out = expand_simple_binop (mode, IOR,
21061 out, GEN_INT (cf),
21062 out, 1, OPTAB_DIRECT);
21063 if (out != operands[0])
21064 emit_move_insn (operands[0], out);
21066 return true;
21071 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21072 || diff == 3 || diff == 5 || diff == 9)
21073 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21074 && (mode != DImode
21075 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21078 * xorl dest,dest
21079 * cmpl op1,op2
21080 * setcc dest
21081 * lea cf(dest*(ct-cf)),dest
21083 * Size 14.
21085 * This also catches the degenerate setcc-only case.
21088 rtx tmp;
21089 int nops;
21091 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21093 nops = 0;
21094 /* On x86_64 the lea instruction operates on Pmode, so we need
21095 to get arithmetics done in proper mode to match. */
21096 if (diff == 1)
21097 tmp = copy_rtx (out);
21098 else
21100 rtx out1;
21101 out1 = copy_rtx (out);
21102 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21103 nops++;
21104 if (diff & 1)
21106 tmp = gen_rtx_PLUS (mode, tmp, out1);
21107 nops++;
21110 if (cf != 0)
21112 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21113 nops++;
21115 if (!rtx_equal_p (tmp, out))
21117 if (nops == 1)
21118 out = force_operand (tmp, copy_rtx (out));
21119 else
21120 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
21122 if (!rtx_equal_p (out, operands[0]))
21123 emit_move_insn (operands[0], copy_rtx (out));
21125 return true;
21129 * General case: Jumpful:
21130 * xorl dest,dest cmpl op1, op2
21131 * cmpl op1, op2 movl ct, dest
21132 * setcc dest jcc 1f
21133 * decl dest movl cf, dest
21134 * andl (cf-ct),dest 1:
21135 * addl ct,dest
21137 * Size 20. Size 14.
21139 * This is reasonably steep, but branch mispredict costs are
21140 * high on modern cpus, so consider failing only if optimizing
21141 * for space.
21144 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21145 && BRANCH_COST (optimize_insn_for_speed_p (),
21146 false) >= 2)
21148 if (cf == 0)
21150 machine_mode cmp_mode = GET_MODE (op0);
21151 enum rtx_code new_code;
21153 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21155 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21157 /* We may be reversing unordered compare to normal compare,
21158 that is not valid in general (we may convert non-trapping
21159 condition to trapping one), however on i386 we currently
21160 emit all comparisons unordered. */
21161 new_code = reverse_condition_maybe_unordered (code);
21163 else
21165 new_code = ix86_reverse_condition (code, cmp_mode);
21166 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21167 compare_code = reverse_condition (compare_code);
21170 if (new_code != UNKNOWN)
21172 cf = ct;
21173 ct = 0;
21174 code = new_code;
21178 if (compare_code != UNKNOWN)
21180 /* notl op1 (if needed)
21181 sarl $31, op1
21182 andl (cf-ct), op1
21183 addl ct, op1
21185 For x < 0 (resp. x <= -1) there will be no notl,
21186 so if possible swap the constants to get rid of the
21187 complement.
21188 True/false will be -1/0 while code below (store flag
21189 followed by decrement) is 0/-1, so the constants need
21190 to be exchanged once more. */
21192 if (compare_code == GE || !cf)
21194 code = reverse_condition (code);
21195 compare_code = LT;
21197 else
21198 std::swap (ct, cf);
21200 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21202 else
21204 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21206 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21207 constm1_rtx,
21208 copy_rtx (out), 1, OPTAB_DIRECT);
21211 out = expand_simple_binop (mode, AND, copy_rtx (out),
21212 gen_int_mode (cf - ct, mode),
21213 copy_rtx (out), 1, OPTAB_DIRECT);
21214 if (ct)
21215 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21216 copy_rtx (out), 1, OPTAB_DIRECT);
21217 if (!rtx_equal_p (out, operands[0]))
21218 emit_move_insn (operands[0], copy_rtx (out));
21220 return true;
21224 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21226 /* Try a few things more with specific constants and a variable. */
21228 optab op;
21229 rtx var, orig_out, out, tmp;
21231 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21232 return false;
21234 /* If one of the two operands is an interesting constant, load a
21235 constant with the above and mask it in with a logical operation. */
21237 if (CONST_INT_P (operands[2]))
21239 var = operands[3];
21240 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21241 operands[3] = constm1_rtx, op = and_optab;
21242 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21243 operands[3] = const0_rtx, op = ior_optab;
21244 else
21245 return false;
21247 else if (CONST_INT_P (operands[3]))
21249 var = operands[2];
21250 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21251 operands[2] = constm1_rtx, op = and_optab;
21252 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21253 operands[2] = const0_rtx, op = ior_optab;
21254 else
21255 return false;
21257 else
21258 return false;
21260 orig_out = operands[0];
21261 tmp = gen_reg_rtx (mode);
21262 operands[0] = tmp;
21264 /* Recurse to get the constant loaded. */
21265 if (!ix86_expand_int_movcc (operands))
21266 return false;
21268 /* Mask in the interesting variable. */
21269 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21270 OPTAB_WIDEN);
21271 if (!rtx_equal_p (out, orig_out))
21272 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21274 return true;
21278 * For comparison with above,
21280 * movl cf,dest
21281 * movl ct,tmp
21282 * cmpl op1,op2
21283 * cmovcc tmp,dest
21285 * Size 15.
21288 if (! nonimmediate_operand (operands[2], mode))
21289 operands[2] = force_reg (mode, operands[2]);
21290 if (! nonimmediate_operand (operands[3], mode))
21291 operands[3] = force_reg (mode, operands[3]);
21293 if (! register_operand (operands[2], VOIDmode)
21294 && (mode == QImode
21295 || ! register_operand (operands[3], VOIDmode)))
21296 operands[2] = force_reg (mode, operands[2]);
21298 if (mode == QImode
21299 && ! register_operand (operands[3], VOIDmode))
21300 operands[3] = force_reg (mode, operands[3]);
21302 emit_insn (compare_seq);
21303 emit_insn (gen_rtx_SET (operands[0],
21304 gen_rtx_IF_THEN_ELSE (mode,
21305 compare_op, operands[2],
21306 operands[3])));
21307 return true;
21310 /* Swap, force into registers, or otherwise massage the two operands
21311 to an sse comparison with a mask result. Thus we differ a bit from
21312 ix86_prepare_fp_compare_args which expects to produce a flags result.
21314 The DEST operand exists to help determine whether to commute commutative
21315 operators. The POP0/POP1 operands are updated in place. The new
21316 comparison code is returned, or UNKNOWN if not implementable. */
21318 static enum rtx_code
21319 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21320 rtx *pop0, rtx *pop1)
21322 switch (code)
21324 case LTGT:
21325 case UNEQ:
21326 /* AVX supports all the needed comparisons. */
21327 if (TARGET_AVX)
21328 break;
21329 /* We have no LTGT as an operator. We could implement it with
21330 NE & ORDERED, but this requires an extra temporary. It's
21331 not clear that it's worth it. */
21332 return UNKNOWN;
21334 case LT:
21335 case LE:
21336 case UNGT:
21337 case UNGE:
21338 /* These are supported directly. */
21339 break;
21341 case EQ:
21342 case NE:
21343 case UNORDERED:
21344 case ORDERED:
21345 /* AVX has 3 operand comparisons, no need to swap anything. */
21346 if (TARGET_AVX)
21347 break;
21348 /* For commutative operators, try to canonicalize the destination
21349 operand to be first in the comparison - this helps reload to
21350 avoid extra moves. */
21351 if (!dest || !rtx_equal_p (dest, *pop1))
21352 break;
21353 /* FALLTHRU */
21355 case GE:
21356 case GT:
21357 case UNLE:
21358 case UNLT:
21359 /* These are not supported directly before AVX, and furthermore
21360 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21361 comparison operands to transform into something that is
21362 supported. */
21363 std::swap (*pop0, *pop1);
21364 code = swap_condition (code);
21365 break;
21367 default:
21368 gcc_unreachable ();
21371 return code;
21374 /* Detect conditional moves that exactly match min/max operational
21375 semantics. Note that this is IEEE safe, as long as we don't
21376 interchange the operands.
21378 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21379 and TRUE if the operation is successful and instructions are emitted. */
21381 static bool
21382 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21383 rtx cmp_op1, rtx if_true, rtx if_false)
21385 machine_mode mode;
21386 bool is_min;
21387 rtx tmp;
21389 if (code == LT)
21391 else if (code == UNGE)
21392 std::swap (if_true, if_false);
21393 else
21394 return false;
21396 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21397 is_min = true;
21398 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21399 is_min = false;
21400 else
21401 return false;
21403 mode = GET_MODE (dest);
21405 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21406 but MODE may be a vector mode and thus not appropriate. */
21407 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21409 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21410 rtvec v;
21412 if_true = force_reg (mode, if_true);
21413 v = gen_rtvec (2, if_true, if_false);
21414 tmp = gen_rtx_UNSPEC (mode, v, u);
21416 else
21418 code = is_min ? SMIN : SMAX;
21419 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21422 emit_insn (gen_rtx_SET (dest, tmp));
21423 return true;
21426 /* Expand an sse vector comparison. Return the register with the result. */
21428 static rtx
21429 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21430 rtx op_true, rtx op_false)
21432 machine_mode mode = GET_MODE (dest);
21433 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21435 /* In general case result of comparison can differ from operands' type. */
21436 machine_mode cmp_mode;
21438 /* In AVX512F the result of comparison is an integer mask. */
21439 bool maskcmp = false;
21440 rtx x;
21442 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21444 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21445 gcc_assert (cmp_mode != BLKmode);
21447 maskcmp = true;
21449 else
21450 cmp_mode = cmp_ops_mode;
21453 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21454 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21455 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21457 if (optimize
21458 || reg_overlap_mentioned_p (dest, op_true)
21459 || reg_overlap_mentioned_p (dest, op_false))
21460 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21462 /* Compare patterns for int modes are unspec in AVX512F only. */
21463 if (maskcmp && (code == GT || code == EQ))
21465 rtx (*gen)(rtx, rtx, rtx);
21467 switch (cmp_ops_mode)
21469 case V64QImode:
21470 gcc_assert (TARGET_AVX512BW);
21471 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21472 break;
21473 case V32HImode:
21474 gcc_assert (TARGET_AVX512BW);
21475 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21476 break;
21477 case V16SImode:
21478 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21479 break;
21480 case V8DImode:
21481 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21482 break;
21483 default:
21484 gen = NULL;
21487 if (gen)
21489 emit_insn (gen (dest, cmp_op0, cmp_op1));
21490 return dest;
21493 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21495 if (cmp_mode != mode && !maskcmp)
21497 x = force_reg (cmp_ops_mode, x);
21498 convert_move (dest, x, false);
21500 else
21501 emit_insn (gen_rtx_SET (dest, x));
21503 return dest;
21506 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21507 operations. This is used for both scalar and vector conditional moves. */
21509 static void
21510 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21512 machine_mode mode = GET_MODE (dest);
21513 machine_mode cmpmode = GET_MODE (cmp);
21515 /* In AVX512F the result of comparison is an integer mask. */
21516 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21518 rtx t2, t3, x;
21520 if (vector_all_ones_operand (op_true, mode)
21521 && rtx_equal_p (op_false, CONST0_RTX (mode))
21522 && !maskcmp)
21524 emit_insn (gen_rtx_SET (dest, cmp));
21526 else if (op_false == CONST0_RTX (mode)
21527 && !maskcmp)
21529 op_true = force_reg (mode, op_true);
21530 x = gen_rtx_AND (mode, cmp, op_true);
21531 emit_insn (gen_rtx_SET (dest, x));
21533 else if (op_true == CONST0_RTX (mode)
21534 && !maskcmp)
21536 op_false = force_reg (mode, op_false);
21537 x = gen_rtx_NOT (mode, cmp);
21538 x = gen_rtx_AND (mode, x, op_false);
21539 emit_insn (gen_rtx_SET (dest, x));
21541 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21542 && !maskcmp)
21544 op_false = force_reg (mode, op_false);
21545 x = gen_rtx_IOR (mode, cmp, op_false);
21546 emit_insn (gen_rtx_SET (dest, x));
21548 else if (TARGET_XOP
21549 && !maskcmp)
21551 op_true = force_reg (mode, op_true);
21553 if (!nonimmediate_operand (op_false, mode))
21554 op_false = force_reg (mode, op_false);
21556 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21557 op_true,
21558 op_false)));
21560 else
21562 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21563 rtx d = dest;
21565 if (!nonimmediate_operand (op_true, mode))
21566 op_true = force_reg (mode, op_true);
21568 op_false = force_reg (mode, op_false);
21570 switch (mode)
21572 case V4SFmode:
21573 if (TARGET_SSE4_1)
21574 gen = gen_sse4_1_blendvps;
21575 break;
21576 case V2DFmode:
21577 if (TARGET_SSE4_1)
21578 gen = gen_sse4_1_blendvpd;
21579 break;
21580 case V16QImode:
21581 case V8HImode:
21582 case V4SImode:
21583 case V2DImode:
21584 if (TARGET_SSE4_1)
21586 gen = gen_sse4_1_pblendvb;
21587 if (mode != V16QImode)
21588 d = gen_reg_rtx (V16QImode);
21589 op_false = gen_lowpart (V16QImode, op_false);
21590 op_true = gen_lowpart (V16QImode, op_true);
21591 cmp = gen_lowpart (V16QImode, cmp);
21593 break;
21594 case V8SFmode:
21595 if (TARGET_AVX)
21596 gen = gen_avx_blendvps256;
21597 break;
21598 case V4DFmode:
21599 if (TARGET_AVX)
21600 gen = gen_avx_blendvpd256;
21601 break;
21602 case V32QImode:
21603 case V16HImode:
21604 case V8SImode:
21605 case V4DImode:
21606 if (TARGET_AVX2)
21608 gen = gen_avx2_pblendvb;
21609 if (mode != V32QImode)
21610 d = gen_reg_rtx (V32QImode);
21611 op_false = gen_lowpart (V32QImode, op_false);
21612 op_true = gen_lowpart (V32QImode, op_true);
21613 cmp = gen_lowpart (V32QImode, cmp);
21615 break;
21617 case V64QImode:
21618 gen = gen_avx512bw_blendmv64qi;
21619 break;
21620 case V32HImode:
21621 gen = gen_avx512bw_blendmv32hi;
21622 break;
21623 case V16SImode:
21624 gen = gen_avx512f_blendmv16si;
21625 break;
21626 case V8DImode:
21627 gen = gen_avx512f_blendmv8di;
21628 break;
21629 case V8DFmode:
21630 gen = gen_avx512f_blendmv8df;
21631 break;
21632 case V16SFmode:
21633 gen = gen_avx512f_blendmv16sf;
21634 break;
21636 default:
21637 break;
21640 if (gen != NULL)
21642 emit_insn (gen (d, op_false, op_true, cmp));
21643 if (d != dest)
21644 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21646 else
21648 op_true = force_reg (mode, op_true);
21650 t2 = gen_reg_rtx (mode);
21651 if (optimize)
21652 t3 = gen_reg_rtx (mode);
21653 else
21654 t3 = dest;
21656 x = gen_rtx_AND (mode, op_true, cmp);
21657 emit_insn (gen_rtx_SET (t2, x));
21659 x = gen_rtx_NOT (mode, cmp);
21660 x = gen_rtx_AND (mode, x, op_false);
21661 emit_insn (gen_rtx_SET (t3, x));
21663 x = gen_rtx_IOR (mode, t3, t2);
21664 emit_insn (gen_rtx_SET (dest, x));
21669 /* Expand a floating-point conditional move. Return true if successful. */
21671 bool
21672 ix86_expand_fp_movcc (rtx operands[])
21674 machine_mode mode = GET_MODE (operands[0]);
21675 enum rtx_code code = GET_CODE (operands[1]);
21676 rtx tmp, compare_op;
21677 rtx op0 = XEXP (operands[1], 0);
21678 rtx op1 = XEXP (operands[1], 1);
21680 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21682 machine_mode cmode;
21684 /* Since we've no cmove for sse registers, don't force bad register
21685 allocation just to gain access to it. Deny movcc when the
21686 comparison mode doesn't match the move mode. */
21687 cmode = GET_MODE (op0);
21688 if (cmode == VOIDmode)
21689 cmode = GET_MODE (op1);
21690 if (cmode != mode)
21691 return false;
21693 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21694 if (code == UNKNOWN)
21695 return false;
21697 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21698 operands[2], operands[3]))
21699 return true;
21701 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21702 operands[2], operands[3]);
21703 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21704 return true;
21707 if (GET_MODE (op0) == TImode
21708 || (GET_MODE (op0) == DImode
21709 && !TARGET_64BIT))
21710 return false;
21712 /* The floating point conditional move instructions don't directly
21713 support conditions resulting from a signed integer comparison. */
21715 compare_op = ix86_expand_compare (code, op0, op1);
21716 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21718 tmp = gen_reg_rtx (QImode);
21719 ix86_expand_setcc (tmp, code, op0, op1);
21721 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21724 emit_insn (gen_rtx_SET (operands[0],
21725 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21726 operands[2], operands[3])));
21728 return true;
21731 /* Expand a floating-point vector conditional move; a vcond operation
21732 rather than a movcc operation. */
21734 bool
21735 ix86_expand_fp_vcond (rtx operands[])
21737 enum rtx_code code = GET_CODE (operands[3]);
21738 rtx cmp;
21740 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21741 &operands[4], &operands[5]);
21742 if (code == UNKNOWN)
21744 rtx temp;
21745 switch (GET_CODE (operands[3]))
21747 case LTGT:
21748 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21749 operands[5], operands[0], operands[0]);
21750 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21751 operands[5], operands[1], operands[2]);
21752 code = AND;
21753 break;
21754 case UNEQ:
21755 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21756 operands[5], operands[0], operands[0]);
21757 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21758 operands[5], operands[1], operands[2]);
21759 code = IOR;
21760 break;
21761 default:
21762 gcc_unreachable ();
21764 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21765 OPTAB_DIRECT);
21766 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21767 return true;
21770 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21771 operands[5], operands[1], operands[2]))
21772 return true;
21774 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21775 operands[1], operands[2]);
21776 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21777 return true;
21780 /* Expand a signed/unsigned integral vector conditional move. */
21782 bool
21783 ix86_expand_int_vcond (rtx operands[])
21785 machine_mode data_mode = GET_MODE (operands[0]);
21786 machine_mode mode = GET_MODE (operands[4]);
21787 enum rtx_code code = GET_CODE (operands[3]);
21788 bool negate = false;
21789 rtx x, cop0, cop1;
21791 cop0 = operands[4];
21792 cop1 = operands[5];
21794 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21795 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21796 if ((code == LT || code == GE)
21797 && data_mode == mode
21798 && cop1 == CONST0_RTX (mode)
21799 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21800 && GET_MODE_UNIT_SIZE (data_mode) > 1
21801 && GET_MODE_UNIT_SIZE (data_mode) <= 8
21802 && (GET_MODE_SIZE (data_mode) == 16
21803 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21805 rtx negop = operands[2 - (code == LT)];
21806 int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1;
21807 if (negop == CONST1_RTX (data_mode))
21809 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21810 operands[0], 1, OPTAB_DIRECT);
21811 if (res != operands[0])
21812 emit_move_insn (operands[0], res);
21813 return true;
21815 else if (GET_MODE_INNER (data_mode) != DImode
21816 && vector_all_ones_operand (negop, data_mode))
21818 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21819 operands[0], 0, OPTAB_DIRECT);
21820 if (res != operands[0])
21821 emit_move_insn (operands[0], res);
21822 return true;
21826 if (!nonimmediate_operand (cop1, mode))
21827 cop1 = force_reg (mode, cop1);
21828 if (!general_operand (operands[1], data_mode))
21829 operands[1] = force_reg (data_mode, operands[1]);
21830 if (!general_operand (operands[2], data_mode))
21831 operands[2] = force_reg (data_mode, operands[2]);
21833 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21834 if (TARGET_XOP
21835 && (mode == V16QImode || mode == V8HImode
21836 || mode == V4SImode || mode == V2DImode))
21838 else
21840 /* Canonicalize the comparison to EQ, GT, GTU. */
21841 switch (code)
21843 case EQ:
21844 case GT:
21845 case GTU:
21846 break;
21848 case NE:
21849 case LE:
21850 case LEU:
21851 code = reverse_condition (code);
21852 negate = true;
21853 break;
21855 case GE:
21856 case GEU:
21857 code = reverse_condition (code);
21858 negate = true;
21859 /* FALLTHRU */
21861 case LT:
21862 case LTU:
21863 std::swap (cop0, cop1);
21864 code = swap_condition (code);
21865 break;
21867 default:
21868 gcc_unreachable ();
21871 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21872 if (mode == V2DImode)
21874 switch (code)
21876 case EQ:
21877 /* SSE4.1 supports EQ. */
21878 if (!TARGET_SSE4_1)
21879 return false;
21880 break;
21882 case GT:
21883 case GTU:
21884 /* SSE4.2 supports GT/GTU. */
21885 if (!TARGET_SSE4_2)
21886 return false;
21887 break;
21889 default:
21890 gcc_unreachable ();
21894 /* Unsigned parallel compare is not supported by the hardware.
21895 Play some tricks to turn this into a signed comparison
21896 against 0. */
21897 if (code == GTU)
21899 cop0 = force_reg (mode, cop0);
21901 switch (mode)
21903 case V16SImode:
21904 case V8DImode:
21905 case V8SImode:
21906 case V4DImode:
21907 case V4SImode:
21908 case V2DImode:
21910 rtx t1, t2, mask;
21911 rtx (*gen_sub3) (rtx, rtx, rtx);
21913 switch (mode)
21915 case V16SImode: gen_sub3 = gen_subv16si3; break;
21916 case V8DImode: gen_sub3 = gen_subv8di3; break;
21917 case V8SImode: gen_sub3 = gen_subv8si3; break;
21918 case V4DImode: gen_sub3 = gen_subv4di3; break;
21919 case V4SImode: gen_sub3 = gen_subv4si3; break;
21920 case V2DImode: gen_sub3 = gen_subv2di3; break;
21921 default:
21922 gcc_unreachable ();
21924 /* Subtract (-(INT MAX) - 1) from both operands to make
21925 them signed. */
21926 mask = ix86_build_signbit_mask (mode, true, false);
21927 t1 = gen_reg_rtx (mode);
21928 emit_insn (gen_sub3 (t1, cop0, mask));
21930 t2 = gen_reg_rtx (mode);
21931 emit_insn (gen_sub3 (t2, cop1, mask));
21933 cop0 = t1;
21934 cop1 = t2;
21935 code = GT;
21937 break;
21939 case V64QImode:
21940 case V32HImode:
21941 case V32QImode:
21942 case V16HImode:
21943 case V16QImode:
21944 case V8HImode:
21945 /* Perform a parallel unsigned saturating subtraction. */
21946 x = gen_reg_rtx (mode);
21947 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21949 cop0 = x;
21950 cop1 = CONST0_RTX (mode);
21951 code = EQ;
21952 negate = !negate;
21953 break;
21955 default:
21956 gcc_unreachable ();
21961 /* Allow the comparison to be done in one mode, but the movcc to
21962 happen in another mode. */
21963 if (data_mode == mode)
21965 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21966 operands[1+negate], operands[2-negate]);
21968 else
21970 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21971 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21972 operands[1+negate], operands[2-negate]);
21973 if (GET_MODE (x) == mode)
21974 x = gen_lowpart (data_mode, x);
21977 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21978 operands[2-negate]);
21979 return true;
21982 /* AVX512F does support 64-byte integer vector operations,
21983 thus the longest vector we are faced with is V64QImode. */
21984 #define MAX_VECT_LEN 64
21986 struct expand_vec_perm_d
21988 rtx target, op0, op1;
21989 unsigned char perm[MAX_VECT_LEN];
21990 machine_mode vmode;
21991 unsigned char nelt;
21992 bool one_operand_p;
21993 bool testing_p;
21996 static bool
21997 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21998 struct expand_vec_perm_d *d)
22000 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22001 expander, so args are either in d, or in op0, op1 etc. */
22002 machine_mode mode = GET_MODE (d ? d->op0 : op0);
22003 machine_mode maskmode = mode;
22004 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22006 switch (mode)
22008 case V8HImode:
22009 if (TARGET_AVX512VL && TARGET_AVX512BW)
22010 gen = gen_avx512vl_vpermi2varv8hi3;
22011 break;
22012 case V16HImode:
22013 if (TARGET_AVX512VL && TARGET_AVX512BW)
22014 gen = gen_avx512vl_vpermi2varv16hi3;
22015 break;
22016 case V64QImode:
22017 if (TARGET_AVX512VBMI)
22018 gen = gen_avx512bw_vpermi2varv64qi3;
22019 break;
22020 case V32HImode:
22021 if (TARGET_AVX512BW)
22022 gen = gen_avx512bw_vpermi2varv32hi3;
22023 break;
22024 case V4SImode:
22025 if (TARGET_AVX512VL)
22026 gen = gen_avx512vl_vpermi2varv4si3;
22027 break;
22028 case V8SImode:
22029 if (TARGET_AVX512VL)
22030 gen = gen_avx512vl_vpermi2varv8si3;
22031 break;
22032 case V16SImode:
22033 if (TARGET_AVX512F)
22034 gen = gen_avx512f_vpermi2varv16si3;
22035 break;
22036 case V4SFmode:
22037 if (TARGET_AVX512VL)
22039 gen = gen_avx512vl_vpermi2varv4sf3;
22040 maskmode = V4SImode;
22042 break;
22043 case V8SFmode:
22044 if (TARGET_AVX512VL)
22046 gen = gen_avx512vl_vpermi2varv8sf3;
22047 maskmode = V8SImode;
22049 break;
22050 case V16SFmode:
22051 if (TARGET_AVX512F)
22053 gen = gen_avx512f_vpermi2varv16sf3;
22054 maskmode = V16SImode;
22056 break;
22057 case V2DImode:
22058 if (TARGET_AVX512VL)
22059 gen = gen_avx512vl_vpermi2varv2di3;
22060 break;
22061 case V4DImode:
22062 if (TARGET_AVX512VL)
22063 gen = gen_avx512vl_vpermi2varv4di3;
22064 break;
22065 case V8DImode:
22066 if (TARGET_AVX512F)
22067 gen = gen_avx512f_vpermi2varv8di3;
22068 break;
22069 case V2DFmode:
22070 if (TARGET_AVX512VL)
22072 gen = gen_avx512vl_vpermi2varv2df3;
22073 maskmode = V2DImode;
22075 break;
22076 case V4DFmode:
22077 if (TARGET_AVX512VL)
22079 gen = gen_avx512vl_vpermi2varv4df3;
22080 maskmode = V4DImode;
22082 break;
22083 case V8DFmode:
22084 if (TARGET_AVX512F)
22086 gen = gen_avx512f_vpermi2varv8df3;
22087 maskmode = V8DImode;
22089 break;
22090 default:
22091 break;
22094 if (gen == NULL)
22095 return false;
22097 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22098 expander, so args are either in d, or in op0, op1 etc. */
22099 if (d)
22101 rtx vec[64];
22102 target = d->target;
22103 op0 = d->op0;
22104 op1 = d->op1;
22105 for (int i = 0; i < d->nelt; ++i)
22106 vec[i] = GEN_INT (d->perm[i]);
22107 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22110 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22111 return true;
22114 /* Expand a variable vector permutation. */
22116 void
22117 ix86_expand_vec_perm (rtx operands[])
22119 rtx target = operands[0];
22120 rtx op0 = operands[1];
22121 rtx op1 = operands[2];
22122 rtx mask = operands[3];
22123 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22124 machine_mode mode = GET_MODE (op0);
22125 machine_mode maskmode = GET_MODE (mask);
22126 int w, e, i;
22127 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22129 /* Number of elements in the vector. */
22130 w = GET_MODE_NUNITS (mode);
22131 e = GET_MODE_UNIT_SIZE (mode);
22132 gcc_assert (w <= 64);
22134 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22135 return;
22137 if (TARGET_AVX2)
22139 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22141 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22142 an constant shuffle operand. With a tiny bit of effort we can
22143 use VPERMD instead. A re-interpretation stall for V4DFmode is
22144 unfortunate but there's no avoiding it.
22145 Similarly for V16HImode we don't have instructions for variable
22146 shuffling, while for V32QImode we can use after preparing suitable
22147 masks vpshufb; vpshufb; vpermq; vpor. */
22149 if (mode == V16HImode)
22151 maskmode = mode = V32QImode;
22152 w = 32;
22153 e = 1;
22155 else
22157 maskmode = mode = V8SImode;
22158 w = 8;
22159 e = 4;
22161 t1 = gen_reg_rtx (maskmode);
22163 /* Replicate the low bits of the V4DImode mask into V8SImode:
22164 mask = { A B C D }
22165 t1 = { A A B B C C D D }. */
22166 for (i = 0; i < w / 2; ++i)
22167 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22168 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22169 vt = force_reg (maskmode, vt);
22170 mask = gen_lowpart (maskmode, mask);
22171 if (maskmode == V8SImode)
22172 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22173 else
22174 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22176 /* Multiply the shuffle indicies by two. */
22177 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22178 OPTAB_DIRECT);
22180 /* Add one to the odd shuffle indicies:
22181 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22182 for (i = 0; i < w / 2; ++i)
22184 vec[i * 2] = const0_rtx;
22185 vec[i * 2 + 1] = const1_rtx;
22187 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22188 vt = validize_mem (force_const_mem (maskmode, vt));
22189 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22190 OPTAB_DIRECT);
22192 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22193 operands[3] = mask = t1;
22194 target = gen_reg_rtx (mode);
22195 op0 = gen_lowpart (mode, op0);
22196 op1 = gen_lowpart (mode, op1);
22199 switch (mode)
22201 case V8SImode:
22202 /* The VPERMD and VPERMPS instructions already properly ignore
22203 the high bits of the shuffle elements. No need for us to
22204 perform an AND ourselves. */
22205 if (one_operand_shuffle)
22207 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22208 if (target != operands[0])
22209 emit_move_insn (operands[0],
22210 gen_lowpart (GET_MODE (operands[0]), target));
22212 else
22214 t1 = gen_reg_rtx (V8SImode);
22215 t2 = gen_reg_rtx (V8SImode);
22216 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22217 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22218 goto merge_two;
22220 return;
22222 case V8SFmode:
22223 mask = gen_lowpart (V8SImode, mask);
22224 if (one_operand_shuffle)
22225 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22226 else
22228 t1 = gen_reg_rtx (V8SFmode);
22229 t2 = gen_reg_rtx (V8SFmode);
22230 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22231 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22232 goto merge_two;
22234 return;
22236 case V4SImode:
22237 /* By combining the two 128-bit input vectors into one 256-bit
22238 input vector, we can use VPERMD and VPERMPS for the full
22239 two-operand shuffle. */
22240 t1 = gen_reg_rtx (V8SImode);
22241 t2 = gen_reg_rtx (V8SImode);
22242 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22243 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22244 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22245 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22246 return;
22248 case V4SFmode:
22249 t1 = gen_reg_rtx (V8SFmode);
22250 t2 = gen_reg_rtx (V8SImode);
22251 mask = gen_lowpart (V4SImode, mask);
22252 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22253 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22254 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22255 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22256 return;
22258 case V32QImode:
22259 t1 = gen_reg_rtx (V32QImode);
22260 t2 = gen_reg_rtx (V32QImode);
22261 t3 = gen_reg_rtx (V32QImode);
22262 vt2 = GEN_INT (-128);
22263 for (i = 0; i < 32; i++)
22264 vec[i] = vt2;
22265 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22266 vt = force_reg (V32QImode, vt);
22267 for (i = 0; i < 32; i++)
22268 vec[i] = i < 16 ? vt2 : const0_rtx;
22269 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22270 vt2 = force_reg (V32QImode, vt2);
22271 /* From mask create two adjusted masks, which contain the same
22272 bits as mask in the low 7 bits of each vector element.
22273 The first mask will have the most significant bit clear
22274 if it requests element from the same 128-bit lane
22275 and MSB set if it requests element from the other 128-bit lane.
22276 The second mask will have the opposite values of the MSB,
22277 and additionally will have its 128-bit lanes swapped.
22278 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22279 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22280 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22281 stands for other 12 bytes. */
22282 /* The bit whether element is from the same lane or the other
22283 lane is bit 4, so shift it up by 3 to the MSB position. */
22284 t5 = gen_reg_rtx (V4DImode);
22285 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22286 GEN_INT (3)));
22287 /* Clear MSB bits from the mask just in case it had them set. */
22288 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22289 /* After this t1 will have MSB set for elements from other lane. */
22290 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22291 /* Clear bits other than MSB. */
22292 emit_insn (gen_andv32qi3 (t1, t1, vt));
22293 /* Or in the lower bits from mask into t3. */
22294 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22295 /* And invert MSB bits in t1, so MSB is set for elements from the same
22296 lane. */
22297 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22298 /* Swap 128-bit lanes in t3. */
22299 t6 = gen_reg_rtx (V4DImode);
22300 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22301 const2_rtx, GEN_INT (3),
22302 const0_rtx, const1_rtx));
22303 /* And or in the lower bits from mask into t1. */
22304 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22305 if (one_operand_shuffle)
22307 /* Each of these shuffles will put 0s in places where
22308 element from the other 128-bit lane is needed, otherwise
22309 will shuffle in the requested value. */
22310 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22311 gen_lowpart (V32QImode, t6)));
22312 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22313 /* For t3 the 128-bit lanes are swapped again. */
22314 t7 = gen_reg_rtx (V4DImode);
22315 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22316 const2_rtx, GEN_INT (3),
22317 const0_rtx, const1_rtx));
22318 /* And oring both together leads to the result. */
22319 emit_insn (gen_iorv32qi3 (target, t1,
22320 gen_lowpart (V32QImode, t7)));
22321 if (target != operands[0])
22322 emit_move_insn (operands[0],
22323 gen_lowpart (GET_MODE (operands[0]), target));
22324 return;
22327 t4 = gen_reg_rtx (V32QImode);
22328 /* Similarly to the above one_operand_shuffle code,
22329 just for repeated twice for each operand. merge_two:
22330 code will merge the two results together. */
22331 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22332 gen_lowpart (V32QImode, t6)));
22333 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22334 gen_lowpart (V32QImode, t6)));
22335 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22336 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22337 t7 = gen_reg_rtx (V4DImode);
22338 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22339 const2_rtx, GEN_INT (3),
22340 const0_rtx, const1_rtx));
22341 t8 = gen_reg_rtx (V4DImode);
22342 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22343 const2_rtx, GEN_INT (3),
22344 const0_rtx, const1_rtx));
22345 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22346 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22347 t1 = t4;
22348 t2 = t3;
22349 goto merge_two;
22351 default:
22352 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22353 break;
22357 if (TARGET_XOP)
22359 /* The XOP VPPERM insn supports three inputs. By ignoring the
22360 one_operand_shuffle special case, we avoid creating another
22361 set of constant vectors in memory. */
22362 one_operand_shuffle = false;
22364 /* mask = mask & {2*w-1, ...} */
22365 vt = GEN_INT (2*w - 1);
22367 else
22369 /* mask = mask & {w-1, ...} */
22370 vt = GEN_INT (w - 1);
22373 for (i = 0; i < w; i++)
22374 vec[i] = vt;
22375 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22376 mask = expand_simple_binop (maskmode, AND, mask, vt,
22377 NULL_RTX, 0, OPTAB_DIRECT);
22379 /* For non-QImode operations, convert the word permutation control
22380 into a byte permutation control. */
22381 if (mode != V16QImode)
22383 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22384 GEN_INT (exact_log2 (e)),
22385 NULL_RTX, 0, OPTAB_DIRECT);
22387 /* Convert mask to vector of chars. */
22388 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22390 /* Replicate each of the input bytes into byte positions:
22391 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22392 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22393 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22394 for (i = 0; i < 16; ++i)
22395 vec[i] = GEN_INT (i/e * e);
22396 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22397 vt = validize_mem (force_const_mem (V16QImode, vt));
22398 if (TARGET_XOP)
22399 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22400 else
22401 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22403 /* Convert it into the byte positions by doing
22404 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22405 for (i = 0; i < 16; ++i)
22406 vec[i] = GEN_INT (i % e);
22407 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22408 vt = validize_mem (force_const_mem (V16QImode, vt));
22409 emit_insn (gen_addv16qi3 (mask, mask, vt));
22412 /* The actual shuffle operations all operate on V16QImode. */
22413 op0 = gen_lowpart (V16QImode, op0);
22414 op1 = gen_lowpart (V16QImode, op1);
22416 if (TARGET_XOP)
22418 if (GET_MODE (target) != V16QImode)
22419 target = gen_reg_rtx (V16QImode);
22420 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22421 if (target != operands[0])
22422 emit_move_insn (operands[0],
22423 gen_lowpart (GET_MODE (operands[0]), target));
22425 else if (one_operand_shuffle)
22427 if (GET_MODE (target) != V16QImode)
22428 target = gen_reg_rtx (V16QImode);
22429 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22430 if (target != operands[0])
22431 emit_move_insn (operands[0],
22432 gen_lowpart (GET_MODE (operands[0]), target));
22434 else
22436 rtx xops[6];
22437 bool ok;
22439 /* Shuffle the two input vectors independently. */
22440 t1 = gen_reg_rtx (V16QImode);
22441 t2 = gen_reg_rtx (V16QImode);
22442 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22443 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22445 merge_two:
22446 /* Then merge them together. The key is whether any given control
22447 element contained a bit set that indicates the second word. */
22448 mask = operands[3];
22449 vt = GEN_INT (w);
22450 if (maskmode == V2DImode && !TARGET_SSE4_1)
22452 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22453 more shuffle to convert the V2DI input mask into a V4SI
22454 input mask. At which point the masking that expand_int_vcond
22455 will work as desired. */
22456 rtx t3 = gen_reg_rtx (V4SImode);
22457 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22458 const0_rtx, const0_rtx,
22459 const2_rtx, const2_rtx));
22460 mask = t3;
22461 maskmode = V4SImode;
22462 e = w = 4;
22465 for (i = 0; i < w; i++)
22466 vec[i] = vt;
22467 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22468 vt = force_reg (maskmode, vt);
22469 mask = expand_simple_binop (maskmode, AND, mask, vt,
22470 NULL_RTX, 0, OPTAB_DIRECT);
22472 if (GET_MODE (target) != mode)
22473 target = gen_reg_rtx (mode);
22474 xops[0] = target;
22475 xops[1] = gen_lowpart (mode, t2);
22476 xops[2] = gen_lowpart (mode, t1);
22477 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22478 xops[4] = mask;
22479 xops[5] = vt;
22480 ok = ix86_expand_int_vcond (xops);
22481 gcc_assert (ok);
22482 if (target != operands[0])
22483 emit_move_insn (operands[0],
22484 gen_lowpart (GET_MODE (operands[0]), target));
22488 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22489 true if we should do zero extension, else sign extension. HIGH_P is
22490 true if we want the N/2 high elements, else the low elements. */
22492 void
22493 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22495 machine_mode imode = GET_MODE (src);
22496 rtx tmp;
22498 if (TARGET_SSE4_1)
22500 rtx (*unpack)(rtx, rtx);
22501 rtx (*extract)(rtx, rtx) = NULL;
22502 machine_mode halfmode = BLKmode;
22504 switch (imode)
22506 case V64QImode:
22507 if (unsigned_p)
22508 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22509 else
22510 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22511 halfmode = V32QImode;
22512 extract
22513 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22514 break;
22515 case V32QImode:
22516 if (unsigned_p)
22517 unpack = gen_avx2_zero_extendv16qiv16hi2;
22518 else
22519 unpack = gen_avx2_sign_extendv16qiv16hi2;
22520 halfmode = V16QImode;
22521 extract
22522 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22523 break;
22524 case V32HImode:
22525 if (unsigned_p)
22526 unpack = gen_avx512f_zero_extendv16hiv16si2;
22527 else
22528 unpack = gen_avx512f_sign_extendv16hiv16si2;
22529 halfmode = V16HImode;
22530 extract
22531 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22532 break;
22533 case V16HImode:
22534 if (unsigned_p)
22535 unpack = gen_avx2_zero_extendv8hiv8si2;
22536 else
22537 unpack = gen_avx2_sign_extendv8hiv8si2;
22538 halfmode = V8HImode;
22539 extract
22540 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22541 break;
22542 case V16SImode:
22543 if (unsigned_p)
22544 unpack = gen_avx512f_zero_extendv8siv8di2;
22545 else
22546 unpack = gen_avx512f_sign_extendv8siv8di2;
22547 halfmode = V8SImode;
22548 extract
22549 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22550 break;
22551 case V8SImode:
22552 if (unsigned_p)
22553 unpack = gen_avx2_zero_extendv4siv4di2;
22554 else
22555 unpack = gen_avx2_sign_extendv4siv4di2;
22556 halfmode = V4SImode;
22557 extract
22558 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22559 break;
22560 case V16QImode:
22561 if (unsigned_p)
22562 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22563 else
22564 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22565 break;
22566 case V8HImode:
22567 if (unsigned_p)
22568 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22569 else
22570 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22571 break;
22572 case V4SImode:
22573 if (unsigned_p)
22574 unpack = gen_sse4_1_zero_extendv2siv2di2;
22575 else
22576 unpack = gen_sse4_1_sign_extendv2siv2di2;
22577 break;
22578 default:
22579 gcc_unreachable ();
22582 if (GET_MODE_SIZE (imode) >= 32)
22584 tmp = gen_reg_rtx (halfmode);
22585 emit_insn (extract (tmp, src));
22587 else if (high_p)
22589 /* Shift higher 8 bytes to lower 8 bytes. */
22590 tmp = gen_reg_rtx (V1TImode);
22591 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22592 GEN_INT (64)));
22593 tmp = gen_lowpart (imode, tmp);
22595 else
22596 tmp = src;
22598 emit_insn (unpack (dest, tmp));
22600 else
22602 rtx (*unpack)(rtx, rtx, rtx);
22604 switch (imode)
22606 case V16QImode:
22607 if (high_p)
22608 unpack = gen_vec_interleave_highv16qi;
22609 else
22610 unpack = gen_vec_interleave_lowv16qi;
22611 break;
22612 case V8HImode:
22613 if (high_p)
22614 unpack = gen_vec_interleave_highv8hi;
22615 else
22616 unpack = gen_vec_interleave_lowv8hi;
22617 break;
22618 case V4SImode:
22619 if (high_p)
22620 unpack = gen_vec_interleave_highv4si;
22621 else
22622 unpack = gen_vec_interleave_lowv4si;
22623 break;
22624 default:
22625 gcc_unreachable ();
22628 if (unsigned_p)
22629 tmp = force_reg (imode, CONST0_RTX (imode));
22630 else
22631 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22632 src, pc_rtx, pc_rtx);
22634 rtx tmp2 = gen_reg_rtx (imode);
22635 emit_insn (unpack (tmp2, src, tmp));
22636 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22640 /* Expand conditional increment or decrement using adb/sbb instructions.
22641 The default case using setcc followed by the conditional move can be
22642 done by generic code. */
22643 bool
22644 ix86_expand_int_addcc (rtx operands[])
22646 enum rtx_code code = GET_CODE (operands[1]);
22647 rtx flags;
22648 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22649 rtx compare_op;
22650 rtx val = const0_rtx;
22651 bool fpcmp = false;
22652 machine_mode mode;
22653 rtx op0 = XEXP (operands[1], 0);
22654 rtx op1 = XEXP (operands[1], 1);
22656 if (operands[3] != const1_rtx
22657 && operands[3] != constm1_rtx)
22658 return false;
22659 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22660 return false;
22661 code = GET_CODE (compare_op);
22663 flags = XEXP (compare_op, 0);
22665 if (GET_MODE (flags) == CCFPmode
22666 || GET_MODE (flags) == CCFPUmode)
22668 fpcmp = true;
22669 code = ix86_fp_compare_code_to_integer (code);
22672 if (code != LTU)
22674 val = constm1_rtx;
22675 if (fpcmp)
22676 PUT_CODE (compare_op,
22677 reverse_condition_maybe_unordered
22678 (GET_CODE (compare_op)));
22679 else
22680 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22683 mode = GET_MODE (operands[0]);
22685 /* Construct either adc or sbb insn. */
22686 if ((code == LTU) == (operands[3] == constm1_rtx))
22688 switch (mode)
22690 case QImode:
22691 insn = gen_subqi3_carry;
22692 break;
22693 case HImode:
22694 insn = gen_subhi3_carry;
22695 break;
22696 case SImode:
22697 insn = gen_subsi3_carry;
22698 break;
22699 case DImode:
22700 insn = gen_subdi3_carry;
22701 break;
22702 default:
22703 gcc_unreachable ();
22706 else
22708 switch (mode)
22710 case QImode:
22711 insn = gen_addqi3_carry;
22712 break;
22713 case HImode:
22714 insn = gen_addhi3_carry;
22715 break;
22716 case SImode:
22717 insn = gen_addsi3_carry;
22718 break;
22719 case DImode:
22720 insn = gen_adddi3_carry;
22721 break;
22722 default:
22723 gcc_unreachable ();
22726 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22728 return true;
22732 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22733 but works for floating pointer parameters and nonoffsetable memories.
22734 For pushes, it returns just stack offsets; the values will be saved
22735 in the right order. Maximally three parts are generated. */
22737 static int
22738 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22740 int size;
22742 if (!TARGET_64BIT)
22743 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22744 else
22745 size = (GET_MODE_SIZE (mode) + 4) / 8;
22747 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22748 gcc_assert (size >= 2 && size <= 4);
22750 /* Optimize constant pool reference to immediates. This is used by fp
22751 moves, that force all constants to memory to allow combining. */
22752 if (MEM_P (operand) && MEM_READONLY_P (operand))
22754 rtx tmp = maybe_get_pool_constant (operand);
22755 if (tmp)
22756 operand = tmp;
22759 if (MEM_P (operand) && !offsettable_memref_p (operand))
22761 /* The only non-offsetable memories we handle are pushes. */
22762 int ok = push_operand (operand, VOIDmode);
22764 gcc_assert (ok);
22766 operand = copy_rtx (operand);
22767 PUT_MODE (operand, word_mode);
22768 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22769 return size;
22772 if (GET_CODE (operand) == CONST_VECTOR)
22774 machine_mode imode = int_mode_for_mode (mode);
22775 /* Caution: if we looked through a constant pool memory above,
22776 the operand may actually have a different mode now. That's
22777 ok, since we want to pun this all the way back to an integer. */
22778 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22779 gcc_assert (operand != NULL);
22780 mode = imode;
22783 if (!TARGET_64BIT)
22785 if (mode == DImode)
22786 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22787 else
22789 int i;
22791 if (REG_P (operand))
22793 gcc_assert (reload_completed);
22794 for (i = 0; i < size; i++)
22795 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22797 else if (offsettable_memref_p (operand))
22799 operand = adjust_address (operand, SImode, 0);
22800 parts[0] = operand;
22801 for (i = 1; i < size; i++)
22802 parts[i] = adjust_address (operand, SImode, 4 * i);
22804 else if (CONST_DOUBLE_P (operand))
22806 REAL_VALUE_TYPE r;
22807 long l[4];
22809 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22810 switch (mode)
22812 case TFmode:
22813 real_to_target (l, &r, mode);
22814 parts[3] = gen_int_mode (l[3], SImode);
22815 parts[2] = gen_int_mode (l[2], SImode);
22816 break;
22817 case XFmode:
22818 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22819 long double may not be 80-bit. */
22820 real_to_target (l, &r, mode);
22821 parts[2] = gen_int_mode (l[2], SImode);
22822 break;
22823 case DFmode:
22824 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22825 break;
22826 default:
22827 gcc_unreachable ();
22829 parts[1] = gen_int_mode (l[1], SImode);
22830 parts[0] = gen_int_mode (l[0], SImode);
22832 else
22833 gcc_unreachable ();
22836 else
22838 if (mode == TImode)
22839 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22840 if (mode == XFmode || mode == TFmode)
22842 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22843 if (REG_P (operand))
22845 gcc_assert (reload_completed);
22846 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22847 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22849 else if (offsettable_memref_p (operand))
22851 operand = adjust_address (operand, DImode, 0);
22852 parts[0] = operand;
22853 parts[1] = adjust_address (operand, upper_mode, 8);
22855 else if (CONST_DOUBLE_P (operand))
22857 REAL_VALUE_TYPE r;
22858 long l[4];
22860 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22861 real_to_target (l, &r, mode);
22863 /* real_to_target puts 32-bit pieces in each long. */
22864 parts[0] =
22865 gen_int_mode
22866 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22867 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22868 DImode);
22870 if (upper_mode == SImode)
22871 parts[1] = gen_int_mode (l[2], SImode);
22872 else
22873 parts[1] =
22874 gen_int_mode
22875 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22876 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22877 DImode);
22879 else
22880 gcc_unreachable ();
22884 return size;
22887 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22888 Return false when normal moves are needed; true when all required
22889 insns have been emitted. Operands 2-4 contain the input values
22890 int the correct order; operands 5-7 contain the output values. */
22892 void
22893 ix86_split_long_move (rtx operands[])
22895 rtx part[2][4];
22896 int nparts, i, j;
22897 int push = 0;
22898 int collisions = 0;
22899 machine_mode mode = GET_MODE (operands[0]);
22900 bool collisionparts[4];
22902 /* The DFmode expanders may ask us to move double.
22903 For 64bit target this is single move. By hiding the fact
22904 here we simplify i386.md splitters. */
22905 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22907 /* Optimize constant pool reference to immediates. This is used by
22908 fp moves, that force all constants to memory to allow combining. */
22910 if (MEM_P (operands[1])
22911 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22912 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22913 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22914 if (push_operand (operands[0], VOIDmode))
22916 operands[0] = copy_rtx (operands[0]);
22917 PUT_MODE (operands[0], word_mode);
22919 else
22920 operands[0] = gen_lowpart (DImode, operands[0]);
22921 operands[1] = gen_lowpart (DImode, operands[1]);
22922 emit_move_insn (operands[0], operands[1]);
22923 return;
22926 /* The only non-offsettable memory we handle is push. */
22927 if (push_operand (operands[0], VOIDmode))
22928 push = 1;
22929 else
22930 gcc_assert (!MEM_P (operands[0])
22931 || offsettable_memref_p (operands[0]));
22933 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22934 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22936 /* When emitting push, take care for source operands on the stack. */
22937 if (push && MEM_P (operands[1])
22938 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22940 rtx src_base = XEXP (part[1][nparts - 1], 0);
22942 /* Compensate for the stack decrement by 4. */
22943 if (!TARGET_64BIT && nparts == 3
22944 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22945 src_base = plus_constant (Pmode, src_base, 4);
22947 /* src_base refers to the stack pointer and is
22948 automatically decreased by emitted push. */
22949 for (i = 0; i < nparts; i++)
22950 part[1][i] = change_address (part[1][i],
22951 GET_MODE (part[1][i]), src_base);
22954 /* We need to do copy in the right order in case an address register
22955 of the source overlaps the destination. */
22956 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22958 rtx tmp;
22960 for (i = 0; i < nparts; i++)
22962 collisionparts[i]
22963 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22964 if (collisionparts[i])
22965 collisions++;
22968 /* Collision in the middle part can be handled by reordering. */
22969 if (collisions == 1 && nparts == 3 && collisionparts [1])
22971 std::swap (part[0][1], part[0][2]);
22972 std::swap (part[1][1], part[1][2]);
22974 else if (collisions == 1
22975 && nparts == 4
22976 && (collisionparts [1] || collisionparts [2]))
22978 if (collisionparts [1])
22980 std::swap (part[0][1], part[0][2]);
22981 std::swap (part[1][1], part[1][2]);
22983 else
22985 std::swap (part[0][2], part[0][3]);
22986 std::swap (part[1][2], part[1][3]);
22990 /* If there are more collisions, we can't handle it by reordering.
22991 Do an lea to the last part and use only one colliding move. */
22992 else if (collisions > 1)
22994 rtx base, addr, tls_base = NULL_RTX;
22996 collisions = 1;
22998 base = part[0][nparts - 1];
23000 /* Handle the case when the last part isn't valid for lea.
23001 Happens in 64-bit mode storing the 12-byte XFmode. */
23002 if (GET_MODE (base) != Pmode)
23003 base = gen_rtx_REG (Pmode, REGNO (base));
23005 addr = XEXP (part[1][0], 0);
23006 if (TARGET_TLS_DIRECT_SEG_REFS)
23008 struct ix86_address parts;
23009 int ok = ix86_decompose_address (addr, &parts);
23010 gcc_assert (ok);
23011 if (parts.seg == DEFAULT_TLS_SEG_REG)
23013 /* It is not valid to use %gs: or %fs: in
23014 lea though, so we need to remove it from the
23015 address used for lea and add it to each individual
23016 memory loads instead. */
23017 addr = copy_rtx (addr);
23018 rtx *x = &addr;
23019 while (GET_CODE (*x) == PLUS)
23021 for (i = 0; i < 2; i++)
23023 rtx u = XEXP (*x, i);
23024 if (GET_CODE (u) == ZERO_EXTEND)
23025 u = XEXP (u, 0);
23026 if (GET_CODE (u) == UNSPEC
23027 && XINT (u, 1) == UNSPEC_TP)
23029 tls_base = XEXP (*x, i);
23030 *x = XEXP (*x, 1 - i);
23031 break;
23034 if (tls_base)
23035 break;
23036 x = &XEXP (*x, 0);
23038 gcc_assert (tls_base);
23041 emit_insn (gen_rtx_SET (base, addr));
23042 if (tls_base)
23043 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
23044 part[1][0] = replace_equiv_address (part[1][0], base);
23045 for (i = 1; i < nparts; i++)
23047 if (tls_base)
23048 base = copy_rtx (base);
23049 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
23050 part[1][i] = replace_equiv_address (part[1][i], tmp);
23055 if (push)
23057 if (!TARGET_64BIT)
23059 if (nparts == 3)
23061 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
23062 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
23063 stack_pointer_rtx, GEN_INT (-4)));
23064 emit_move_insn (part[0][2], part[1][2]);
23066 else if (nparts == 4)
23068 emit_move_insn (part[0][3], part[1][3]);
23069 emit_move_insn (part[0][2], part[1][2]);
23072 else
23074 /* In 64bit mode we don't have 32bit push available. In case this is
23075 register, it is OK - we will just use larger counterpart. We also
23076 retype memory - these comes from attempt to avoid REX prefix on
23077 moving of second half of TFmode value. */
23078 if (GET_MODE (part[1][1]) == SImode)
23080 switch (GET_CODE (part[1][1]))
23082 case MEM:
23083 part[1][1] = adjust_address (part[1][1], DImode, 0);
23084 break;
23086 case REG:
23087 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23088 break;
23090 default:
23091 gcc_unreachable ();
23094 if (GET_MODE (part[1][0]) == SImode)
23095 part[1][0] = part[1][1];
23098 emit_move_insn (part[0][1], part[1][1]);
23099 emit_move_insn (part[0][0], part[1][0]);
23100 return;
23103 /* Choose correct order to not overwrite the source before it is copied. */
23104 if ((REG_P (part[0][0])
23105 && REG_P (part[1][1])
23106 && (REGNO (part[0][0]) == REGNO (part[1][1])
23107 || (nparts == 3
23108 && REGNO (part[0][0]) == REGNO (part[1][2]))
23109 || (nparts == 4
23110 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23111 || (collisions > 0
23112 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23114 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23116 operands[2 + i] = part[0][j];
23117 operands[6 + i] = part[1][j];
23120 else
23122 for (i = 0; i < nparts; i++)
23124 operands[2 + i] = part[0][i];
23125 operands[6 + i] = part[1][i];
23129 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23130 if (optimize_insn_for_size_p ())
23132 for (j = 0; j < nparts - 1; j++)
23133 if (CONST_INT_P (operands[6 + j])
23134 && operands[6 + j] != const0_rtx
23135 && REG_P (operands[2 + j]))
23136 for (i = j; i < nparts - 1; i++)
23137 if (CONST_INT_P (operands[7 + i])
23138 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23139 operands[7 + i] = operands[2 + j];
23142 for (i = 0; i < nparts; i++)
23143 emit_move_insn (operands[2 + i], operands[6 + i]);
23145 return;
23148 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23149 left shift by a constant, either using a single shift or
23150 a sequence of add instructions. */
23152 static void
23153 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23155 rtx (*insn)(rtx, rtx, rtx);
23157 if (count == 1
23158 || (count * ix86_cost->add <= ix86_cost->shift_const
23159 && !optimize_insn_for_size_p ()))
23161 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23162 while (count-- > 0)
23163 emit_insn (insn (operand, operand, operand));
23165 else
23167 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23168 emit_insn (insn (operand, operand, GEN_INT (count)));
23172 void
23173 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23175 rtx (*gen_ashl3)(rtx, rtx, rtx);
23176 rtx (*gen_shld)(rtx, rtx, rtx);
23177 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23179 rtx low[2], high[2];
23180 int count;
23182 if (CONST_INT_P (operands[2]))
23184 split_double_mode (mode, operands, 2, low, high);
23185 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23187 if (count >= half_width)
23189 emit_move_insn (high[0], low[1]);
23190 emit_move_insn (low[0], const0_rtx);
23192 if (count > half_width)
23193 ix86_expand_ashl_const (high[0], count - half_width, mode);
23195 else
23197 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23199 if (!rtx_equal_p (operands[0], operands[1]))
23200 emit_move_insn (operands[0], operands[1]);
23202 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23203 ix86_expand_ashl_const (low[0], count, mode);
23205 return;
23208 split_double_mode (mode, operands, 1, low, high);
23210 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23212 if (operands[1] == const1_rtx)
23214 /* Assuming we've chosen a QImode capable registers, then 1 << N
23215 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23216 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23218 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23220 ix86_expand_clear (low[0]);
23221 ix86_expand_clear (high[0]);
23222 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23224 d = gen_lowpart (QImode, low[0]);
23225 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23226 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23227 emit_insn (gen_rtx_SET (d, s));
23229 d = gen_lowpart (QImode, high[0]);
23230 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23231 s = gen_rtx_NE (QImode, flags, const0_rtx);
23232 emit_insn (gen_rtx_SET (d, s));
23235 /* Otherwise, we can get the same results by manually performing
23236 a bit extract operation on bit 5/6, and then performing the two
23237 shifts. The two methods of getting 0/1 into low/high are exactly
23238 the same size. Avoiding the shift in the bit extract case helps
23239 pentium4 a bit; no one else seems to care much either way. */
23240 else
23242 machine_mode half_mode;
23243 rtx (*gen_lshr3)(rtx, rtx, rtx);
23244 rtx (*gen_and3)(rtx, rtx, rtx);
23245 rtx (*gen_xor3)(rtx, rtx, rtx);
23246 HOST_WIDE_INT bits;
23247 rtx x;
23249 if (mode == DImode)
23251 half_mode = SImode;
23252 gen_lshr3 = gen_lshrsi3;
23253 gen_and3 = gen_andsi3;
23254 gen_xor3 = gen_xorsi3;
23255 bits = 5;
23257 else
23259 half_mode = DImode;
23260 gen_lshr3 = gen_lshrdi3;
23261 gen_and3 = gen_anddi3;
23262 gen_xor3 = gen_xordi3;
23263 bits = 6;
23266 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23267 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23268 else
23269 x = gen_lowpart (half_mode, operands[2]);
23270 emit_insn (gen_rtx_SET (high[0], x));
23272 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23273 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23274 emit_move_insn (low[0], high[0]);
23275 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23278 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23279 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23280 return;
23283 if (operands[1] == constm1_rtx)
23285 /* For -1 << N, we can avoid the shld instruction, because we
23286 know that we're shifting 0...31/63 ones into a -1. */
23287 emit_move_insn (low[0], constm1_rtx);
23288 if (optimize_insn_for_size_p ())
23289 emit_move_insn (high[0], low[0]);
23290 else
23291 emit_move_insn (high[0], constm1_rtx);
23293 else
23295 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23297 if (!rtx_equal_p (operands[0], operands[1]))
23298 emit_move_insn (operands[0], operands[1]);
23300 split_double_mode (mode, operands, 1, low, high);
23301 emit_insn (gen_shld (high[0], low[0], operands[2]));
23304 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23306 if (TARGET_CMOVE && scratch)
23308 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23309 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23311 ix86_expand_clear (scratch);
23312 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23314 else
23316 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23317 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23319 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23323 void
23324 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23326 rtx (*gen_ashr3)(rtx, rtx, rtx)
23327 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23328 rtx (*gen_shrd)(rtx, rtx, rtx);
23329 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23331 rtx low[2], high[2];
23332 int count;
23334 if (CONST_INT_P (operands[2]))
23336 split_double_mode (mode, operands, 2, low, high);
23337 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23339 if (count == GET_MODE_BITSIZE (mode) - 1)
23341 emit_move_insn (high[0], high[1]);
23342 emit_insn (gen_ashr3 (high[0], high[0],
23343 GEN_INT (half_width - 1)));
23344 emit_move_insn (low[0], high[0]);
23347 else if (count >= half_width)
23349 emit_move_insn (low[0], high[1]);
23350 emit_move_insn (high[0], low[0]);
23351 emit_insn (gen_ashr3 (high[0], high[0],
23352 GEN_INT (half_width - 1)));
23354 if (count > half_width)
23355 emit_insn (gen_ashr3 (low[0], low[0],
23356 GEN_INT (count - half_width)));
23358 else
23360 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23362 if (!rtx_equal_p (operands[0], operands[1]))
23363 emit_move_insn (operands[0], operands[1]);
23365 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23366 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23369 else
23371 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23373 if (!rtx_equal_p (operands[0], operands[1]))
23374 emit_move_insn (operands[0], operands[1]);
23376 split_double_mode (mode, operands, 1, low, high);
23378 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23379 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23381 if (TARGET_CMOVE && scratch)
23383 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23384 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23386 emit_move_insn (scratch, high[0]);
23387 emit_insn (gen_ashr3 (scratch, scratch,
23388 GEN_INT (half_width - 1)));
23389 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23390 scratch));
23392 else
23394 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23395 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23397 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23402 void
23403 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23405 rtx (*gen_lshr3)(rtx, rtx, rtx)
23406 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23407 rtx (*gen_shrd)(rtx, rtx, rtx);
23408 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23410 rtx low[2], high[2];
23411 int count;
23413 if (CONST_INT_P (operands[2]))
23415 split_double_mode (mode, operands, 2, low, high);
23416 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23418 if (count >= half_width)
23420 emit_move_insn (low[0], high[1]);
23421 ix86_expand_clear (high[0]);
23423 if (count > half_width)
23424 emit_insn (gen_lshr3 (low[0], low[0],
23425 GEN_INT (count - half_width)));
23427 else
23429 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23431 if (!rtx_equal_p (operands[0], operands[1]))
23432 emit_move_insn (operands[0], operands[1]);
23434 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23435 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23438 else
23440 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23442 if (!rtx_equal_p (operands[0], operands[1]))
23443 emit_move_insn (operands[0], operands[1]);
23445 split_double_mode (mode, operands, 1, low, high);
23447 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23448 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23450 if (TARGET_CMOVE && scratch)
23452 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23453 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23455 ix86_expand_clear (scratch);
23456 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23457 scratch));
23459 else
23461 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23462 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23464 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23469 /* Predict just emitted jump instruction to be taken with probability PROB. */
23470 static void
23471 predict_jump (int prob)
23473 rtx insn = get_last_insn ();
23474 gcc_assert (JUMP_P (insn));
23475 add_int_reg_note (insn, REG_BR_PROB, prob);
23478 /* Helper function for the string operations below. Dest VARIABLE whether
23479 it is aligned to VALUE bytes. If true, jump to the label. */
23480 static rtx_code_label *
23481 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23483 rtx_code_label *label = gen_label_rtx ();
23484 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23485 if (GET_MODE (variable) == DImode)
23486 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23487 else
23488 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23489 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23490 1, label);
23491 if (epilogue)
23492 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23493 else
23494 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23495 return label;
23498 /* Adjust COUNTER by the VALUE. */
23499 static void
23500 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23502 rtx (*gen_add)(rtx, rtx, rtx)
23503 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23505 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23508 /* Zero extend possibly SImode EXP to Pmode register. */
23510 ix86_zero_extend_to_Pmode (rtx exp)
23512 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23515 /* Divide COUNTREG by SCALE. */
23516 static rtx
23517 scale_counter (rtx countreg, int scale)
23519 rtx sc;
23521 if (scale == 1)
23522 return countreg;
23523 if (CONST_INT_P (countreg))
23524 return GEN_INT (INTVAL (countreg) / scale);
23525 gcc_assert (REG_P (countreg));
23527 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23528 GEN_INT (exact_log2 (scale)),
23529 NULL, 1, OPTAB_DIRECT);
23530 return sc;
23533 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23534 DImode for constant loop counts. */
23536 static machine_mode
23537 counter_mode (rtx count_exp)
23539 if (GET_MODE (count_exp) != VOIDmode)
23540 return GET_MODE (count_exp);
23541 if (!CONST_INT_P (count_exp))
23542 return Pmode;
23543 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23544 return DImode;
23545 return SImode;
23548 /* Copy the address to a Pmode register. This is used for x32 to
23549 truncate DImode TLS address to a SImode register. */
23551 static rtx
23552 ix86_copy_addr_to_reg (rtx addr)
23554 rtx reg;
23555 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23557 reg = copy_addr_to_reg (addr);
23558 REG_POINTER (reg) = 1;
23559 return reg;
23561 else
23563 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23564 reg = copy_to_mode_reg (DImode, addr);
23565 REG_POINTER (reg) = 1;
23566 return gen_rtx_SUBREG (SImode, reg, 0);
23570 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23571 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23572 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23573 memory by VALUE (supposed to be in MODE).
23575 The size is rounded down to whole number of chunk size moved at once.
23576 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23579 static void
23580 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23581 rtx destptr, rtx srcptr, rtx value,
23582 rtx count, machine_mode mode, int unroll,
23583 int expected_size, bool issetmem)
23585 rtx_code_label *out_label, *top_label;
23586 rtx iter, tmp;
23587 machine_mode iter_mode = counter_mode (count);
23588 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23589 rtx piece_size = GEN_INT (piece_size_n);
23590 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23591 rtx size;
23592 int i;
23594 top_label = gen_label_rtx ();
23595 out_label = gen_label_rtx ();
23596 iter = gen_reg_rtx (iter_mode);
23598 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23599 NULL, 1, OPTAB_DIRECT);
23600 /* Those two should combine. */
23601 if (piece_size == const1_rtx)
23603 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23604 true, out_label);
23605 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23607 emit_move_insn (iter, const0_rtx);
23609 emit_label (top_label);
23611 tmp = convert_modes (Pmode, iter_mode, iter, true);
23613 /* This assert could be relaxed - in this case we'll need to compute
23614 smallest power of two, containing in PIECE_SIZE_N and pass it to
23615 offset_address. */
23616 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23617 destmem = offset_address (destmem, tmp, piece_size_n);
23618 destmem = adjust_address (destmem, mode, 0);
23620 if (!issetmem)
23622 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23623 srcmem = adjust_address (srcmem, mode, 0);
23625 /* When unrolling for chips that reorder memory reads and writes,
23626 we can save registers by using single temporary.
23627 Also using 4 temporaries is overkill in 32bit mode. */
23628 if (!TARGET_64BIT && 0)
23630 for (i = 0; i < unroll; i++)
23632 if (i)
23634 destmem =
23635 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23636 srcmem =
23637 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23639 emit_move_insn (destmem, srcmem);
23642 else
23644 rtx tmpreg[4];
23645 gcc_assert (unroll <= 4);
23646 for (i = 0; i < unroll; i++)
23648 tmpreg[i] = gen_reg_rtx (mode);
23649 if (i)
23651 srcmem =
23652 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23654 emit_move_insn (tmpreg[i], srcmem);
23656 for (i = 0; i < unroll; i++)
23658 if (i)
23660 destmem =
23661 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23663 emit_move_insn (destmem, tmpreg[i]);
23667 else
23668 for (i = 0; i < unroll; i++)
23670 if (i)
23671 destmem =
23672 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23673 emit_move_insn (destmem, value);
23676 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23677 true, OPTAB_LIB_WIDEN);
23678 if (tmp != iter)
23679 emit_move_insn (iter, tmp);
23681 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23682 true, top_label);
23683 if (expected_size != -1)
23685 expected_size /= GET_MODE_SIZE (mode) * unroll;
23686 if (expected_size == 0)
23687 predict_jump (0);
23688 else if (expected_size > REG_BR_PROB_BASE)
23689 predict_jump (REG_BR_PROB_BASE - 1);
23690 else
23691 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23693 else
23694 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23695 iter = ix86_zero_extend_to_Pmode (iter);
23696 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23697 true, OPTAB_LIB_WIDEN);
23698 if (tmp != destptr)
23699 emit_move_insn (destptr, tmp);
23700 if (!issetmem)
23702 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23703 true, OPTAB_LIB_WIDEN);
23704 if (tmp != srcptr)
23705 emit_move_insn (srcptr, tmp);
23707 emit_label (out_label);
23710 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23711 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23712 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23713 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23714 ORIG_VALUE is the original value passed to memset to fill the memory with.
23715 Other arguments have same meaning as for previous function. */
23717 static void
23718 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23719 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23720 rtx count,
23721 machine_mode mode, bool issetmem)
23723 rtx destexp;
23724 rtx srcexp;
23725 rtx countreg;
23726 HOST_WIDE_INT rounded_count;
23728 /* If possible, it is shorter to use rep movs.
23729 TODO: Maybe it is better to move this logic to decide_alg. */
23730 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23731 && (!issetmem || orig_value == const0_rtx))
23732 mode = SImode;
23734 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23735 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23737 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23738 GET_MODE_SIZE (mode)));
23739 if (mode != QImode)
23741 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23742 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23743 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23745 else
23746 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23747 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23749 rounded_count = (INTVAL (count)
23750 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23751 destmem = shallow_copy_rtx (destmem);
23752 set_mem_size (destmem, rounded_count);
23754 else if (MEM_SIZE_KNOWN_P (destmem))
23755 clear_mem_size (destmem);
23757 if (issetmem)
23759 value = force_reg (mode, gen_lowpart (mode, value));
23760 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23762 else
23764 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23765 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23766 if (mode != QImode)
23768 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23769 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23770 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23772 else
23773 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23774 if (CONST_INT_P (count))
23776 rounded_count = (INTVAL (count)
23777 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23778 srcmem = shallow_copy_rtx (srcmem);
23779 set_mem_size (srcmem, rounded_count);
23781 else
23783 if (MEM_SIZE_KNOWN_P (srcmem))
23784 clear_mem_size (srcmem);
23786 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23787 destexp, srcexp));
23791 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23792 DESTMEM.
23793 SRC is passed by pointer to be updated on return.
23794 Return value is updated DST. */
23795 static rtx
23796 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23797 HOST_WIDE_INT size_to_move)
23799 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23800 enum insn_code code;
23801 machine_mode move_mode;
23802 int piece_size, i;
23804 /* Find the widest mode in which we could perform moves.
23805 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23806 it until move of such size is supported. */
23807 piece_size = 1 << floor_log2 (size_to_move);
23808 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23809 code = optab_handler (mov_optab, move_mode);
23810 while (code == CODE_FOR_nothing && piece_size > 1)
23812 piece_size >>= 1;
23813 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23814 code = optab_handler (mov_optab, move_mode);
23817 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23818 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23819 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23821 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23822 move_mode = mode_for_vector (word_mode, nunits);
23823 code = optab_handler (mov_optab, move_mode);
23824 if (code == CODE_FOR_nothing)
23826 move_mode = word_mode;
23827 piece_size = GET_MODE_SIZE (move_mode);
23828 code = optab_handler (mov_optab, move_mode);
23831 gcc_assert (code != CODE_FOR_nothing);
23833 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23834 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23836 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23837 gcc_assert (size_to_move % piece_size == 0);
23838 adjust = GEN_INT (piece_size);
23839 for (i = 0; i < size_to_move; i += piece_size)
23841 /* We move from memory to memory, so we'll need to do it via
23842 a temporary register. */
23843 tempreg = gen_reg_rtx (move_mode);
23844 emit_insn (GEN_FCN (code) (tempreg, src));
23845 emit_insn (GEN_FCN (code) (dst, tempreg));
23847 emit_move_insn (destptr,
23848 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23849 emit_move_insn (srcptr,
23850 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23852 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23853 piece_size);
23854 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23855 piece_size);
23858 /* Update DST and SRC rtx. */
23859 *srcmem = src;
23860 return dst;
23863 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23864 static void
23865 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23866 rtx destptr, rtx srcptr, rtx count, int max_size)
23868 rtx src, dest;
23869 if (CONST_INT_P (count))
23871 HOST_WIDE_INT countval = INTVAL (count);
23872 HOST_WIDE_INT epilogue_size = countval % max_size;
23873 int i;
23875 /* For now MAX_SIZE should be a power of 2. This assert could be
23876 relaxed, but it'll require a bit more complicated epilogue
23877 expanding. */
23878 gcc_assert ((max_size & (max_size - 1)) == 0);
23879 for (i = max_size; i >= 1; i >>= 1)
23881 if (epilogue_size & i)
23882 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23884 return;
23886 if (max_size > 8)
23888 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23889 count, 1, OPTAB_DIRECT);
23890 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23891 count, QImode, 1, 4, false);
23892 return;
23895 /* When there are stringops, we can cheaply increase dest and src pointers.
23896 Otherwise we save code size by maintaining offset (zero is readily
23897 available from preceding rep operation) and using x86 addressing modes.
23899 if (TARGET_SINGLE_STRINGOP)
23901 if (max_size > 4)
23903 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23904 src = change_address (srcmem, SImode, srcptr);
23905 dest = change_address (destmem, SImode, destptr);
23906 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23907 emit_label (label);
23908 LABEL_NUSES (label) = 1;
23910 if (max_size > 2)
23912 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23913 src = change_address (srcmem, HImode, srcptr);
23914 dest = change_address (destmem, HImode, destptr);
23915 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23916 emit_label (label);
23917 LABEL_NUSES (label) = 1;
23919 if (max_size > 1)
23921 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23922 src = change_address (srcmem, QImode, srcptr);
23923 dest = change_address (destmem, QImode, destptr);
23924 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23925 emit_label (label);
23926 LABEL_NUSES (label) = 1;
23929 else
23931 rtx offset = force_reg (Pmode, const0_rtx);
23932 rtx tmp;
23934 if (max_size > 4)
23936 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23937 src = change_address (srcmem, SImode, srcptr);
23938 dest = change_address (destmem, SImode, destptr);
23939 emit_move_insn (dest, src);
23940 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23941 true, OPTAB_LIB_WIDEN);
23942 if (tmp != offset)
23943 emit_move_insn (offset, tmp);
23944 emit_label (label);
23945 LABEL_NUSES (label) = 1;
23947 if (max_size > 2)
23949 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23950 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23951 src = change_address (srcmem, HImode, tmp);
23952 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23953 dest = change_address (destmem, HImode, tmp);
23954 emit_move_insn (dest, src);
23955 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23956 true, OPTAB_LIB_WIDEN);
23957 if (tmp != offset)
23958 emit_move_insn (offset, tmp);
23959 emit_label (label);
23960 LABEL_NUSES (label) = 1;
23962 if (max_size > 1)
23964 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23965 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23966 src = change_address (srcmem, QImode, tmp);
23967 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23968 dest = change_address (destmem, QImode, tmp);
23969 emit_move_insn (dest, src);
23970 emit_label (label);
23971 LABEL_NUSES (label) = 1;
23976 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23977 with value PROMOTED_VAL.
23978 SRC is passed by pointer to be updated on return.
23979 Return value is updated DST. */
23980 static rtx
23981 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23982 HOST_WIDE_INT size_to_move)
23984 rtx dst = destmem, adjust;
23985 enum insn_code code;
23986 machine_mode move_mode;
23987 int piece_size, i;
23989 /* Find the widest mode in which we could perform moves.
23990 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23991 it until move of such size is supported. */
23992 move_mode = GET_MODE (promoted_val);
23993 if (move_mode == VOIDmode)
23994 move_mode = QImode;
23995 if (size_to_move < GET_MODE_SIZE (move_mode))
23997 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23998 promoted_val = gen_lowpart (move_mode, promoted_val);
24000 piece_size = GET_MODE_SIZE (move_mode);
24001 code = optab_handler (mov_optab, move_mode);
24002 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
24004 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
24006 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
24007 gcc_assert (size_to_move % piece_size == 0);
24008 adjust = GEN_INT (piece_size);
24009 for (i = 0; i < size_to_move; i += piece_size)
24011 if (piece_size <= GET_MODE_SIZE (word_mode))
24013 emit_insn (gen_strset (destptr, dst, promoted_val));
24014 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24015 piece_size);
24016 continue;
24019 emit_insn (GEN_FCN (code) (dst, promoted_val));
24021 emit_move_insn (destptr,
24022 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24024 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24025 piece_size);
24028 /* Update DST rtx. */
24029 return dst;
24031 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24032 static void
24033 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
24034 rtx count, int max_size)
24036 count =
24037 expand_simple_binop (counter_mode (count), AND, count,
24038 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
24039 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
24040 gen_lowpart (QImode, value), count, QImode,
24041 1, max_size / 2, true);
24044 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24045 static void
24046 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
24047 rtx count, int max_size)
24049 rtx dest;
24051 if (CONST_INT_P (count))
24053 HOST_WIDE_INT countval = INTVAL (count);
24054 HOST_WIDE_INT epilogue_size = countval % max_size;
24055 int i;
24057 /* For now MAX_SIZE should be a power of 2. This assert could be
24058 relaxed, but it'll require a bit more complicated epilogue
24059 expanding. */
24060 gcc_assert ((max_size & (max_size - 1)) == 0);
24061 for (i = max_size; i >= 1; i >>= 1)
24063 if (epilogue_size & i)
24065 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24066 destmem = emit_memset (destmem, destptr, vec_value, i);
24067 else
24068 destmem = emit_memset (destmem, destptr, value, i);
24071 return;
24073 if (max_size > 32)
24075 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24076 return;
24078 if (max_size > 16)
24080 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24081 if (TARGET_64BIT)
24083 dest = change_address (destmem, DImode, destptr);
24084 emit_insn (gen_strset (destptr, dest, value));
24085 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24086 emit_insn (gen_strset (destptr, dest, value));
24088 else
24090 dest = change_address (destmem, SImode, destptr);
24091 emit_insn (gen_strset (destptr, dest, value));
24092 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24093 emit_insn (gen_strset (destptr, dest, value));
24094 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24095 emit_insn (gen_strset (destptr, dest, value));
24096 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24097 emit_insn (gen_strset (destptr, dest, value));
24099 emit_label (label);
24100 LABEL_NUSES (label) = 1;
24102 if (max_size > 8)
24104 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24105 if (TARGET_64BIT)
24107 dest = change_address (destmem, DImode, destptr);
24108 emit_insn (gen_strset (destptr, dest, value));
24110 else
24112 dest = change_address (destmem, SImode, destptr);
24113 emit_insn (gen_strset (destptr, dest, value));
24114 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24115 emit_insn (gen_strset (destptr, dest, value));
24117 emit_label (label);
24118 LABEL_NUSES (label) = 1;
24120 if (max_size > 4)
24122 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24123 dest = change_address (destmem, SImode, destptr);
24124 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24125 emit_label (label);
24126 LABEL_NUSES (label) = 1;
24128 if (max_size > 2)
24130 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24131 dest = change_address (destmem, HImode, destptr);
24132 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24133 emit_label (label);
24134 LABEL_NUSES (label) = 1;
24136 if (max_size > 1)
24138 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24139 dest = change_address (destmem, QImode, destptr);
24140 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24141 emit_label (label);
24142 LABEL_NUSES (label) = 1;
24146 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24147 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24148 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24149 ignored.
24150 Return value is updated DESTMEM. */
24151 static rtx
24152 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24153 rtx destptr, rtx srcptr, rtx value,
24154 rtx vec_value, rtx count, int align,
24155 int desired_alignment, bool issetmem)
24157 int i;
24158 for (i = 1; i < desired_alignment; i <<= 1)
24160 if (align <= i)
24162 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24163 if (issetmem)
24165 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24166 destmem = emit_memset (destmem, destptr, vec_value, i);
24167 else
24168 destmem = emit_memset (destmem, destptr, value, i);
24170 else
24171 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24172 ix86_adjust_counter (count, i);
24173 emit_label (label);
24174 LABEL_NUSES (label) = 1;
24175 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24178 return destmem;
24181 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24182 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24183 and jump to DONE_LABEL. */
24184 static void
24185 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24186 rtx destptr, rtx srcptr,
24187 rtx value, rtx vec_value,
24188 rtx count, int size,
24189 rtx done_label, bool issetmem)
24191 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24192 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24193 rtx modesize;
24194 int n;
24196 /* If we do not have vector value to copy, we must reduce size. */
24197 if (issetmem)
24199 if (!vec_value)
24201 if (GET_MODE (value) == VOIDmode && size > 8)
24202 mode = Pmode;
24203 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24204 mode = GET_MODE (value);
24206 else
24207 mode = GET_MODE (vec_value), value = vec_value;
24209 else
24211 /* Choose appropriate vector mode. */
24212 if (size >= 32)
24213 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24214 else if (size >= 16)
24215 mode = TARGET_SSE ? V16QImode : DImode;
24216 srcmem = change_address (srcmem, mode, srcptr);
24218 destmem = change_address (destmem, mode, destptr);
24219 modesize = GEN_INT (GET_MODE_SIZE (mode));
24220 gcc_assert (GET_MODE_SIZE (mode) <= size);
24221 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24223 if (issetmem)
24224 emit_move_insn (destmem, gen_lowpart (mode, value));
24225 else
24227 emit_move_insn (destmem, srcmem);
24228 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24230 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24233 destmem = offset_address (destmem, count, 1);
24234 destmem = offset_address (destmem, GEN_INT (-2 * size),
24235 GET_MODE_SIZE (mode));
24236 if (!issetmem)
24238 srcmem = offset_address (srcmem, count, 1);
24239 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24240 GET_MODE_SIZE (mode));
24242 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24244 if (issetmem)
24245 emit_move_insn (destmem, gen_lowpart (mode, value));
24246 else
24248 emit_move_insn (destmem, srcmem);
24249 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24251 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24253 emit_jump_insn (gen_jump (done_label));
24254 emit_barrier ();
24256 emit_label (label);
24257 LABEL_NUSES (label) = 1;
24260 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24261 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24262 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24263 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24264 DONE_LABEL is a label after the whole copying sequence. The label is created
24265 on demand if *DONE_LABEL is NULL.
24266 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24267 bounds after the initial copies.
24269 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24270 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24271 we will dispatch to a library call for large blocks.
24273 In pseudocode we do:
24275 if (COUNT < SIZE)
24277 Assume that SIZE is 4. Bigger sizes are handled analogously
24278 if (COUNT & 4)
24280 copy 4 bytes from SRCPTR to DESTPTR
24281 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24282 goto done_label
24284 if (!COUNT)
24285 goto done_label;
24286 copy 1 byte from SRCPTR to DESTPTR
24287 if (COUNT & 2)
24289 copy 2 bytes from SRCPTR to DESTPTR
24290 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24293 else
24295 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24296 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24298 OLD_DESPTR = DESTPTR;
24299 Align DESTPTR up to DESIRED_ALIGN
24300 SRCPTR += DESTPTR - OLD_DESTPTR
24301 COUNT -= DEST_PTR - OLD_DESTPTR
24302 if (DYNAMIC_CHECK)
24303 Round COUNT down to multiple of SIZE
24304 << optional caller supplied zero size guard is here >>
24305 << optional caller suppplied dynamic check is here >>
24306 << caller supplied main copy loop is here >>
24308 done_label:
24310 static void
24311 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24312 rtx *destptr, rtx *srcptr,
24313 machine_mode mode,
24314 rtx value, rtx vec_value,
24315 rtx *count,
24316 rtx_code_label **done_label,
24317 int size,
24318 int desired_align,
24319 int align,
24320 unsigned HOST_WIDE_INT *min_size,
24321 bool dynamic_check,
24322 bool issetmem)
24324 rtx_code_label *loop_label = NULL, *label;
24325 int n;
24326 rtx modesize;
24327 int prolog_size = 0;
24328 rtx mode_value;
24330 /* Chose proper value to copy. */
24331 if (issetmem && VECTOR_MODE_P (mode))
24332 mode_value = vec_value;
24333 else
24334 mode_value = value;
24335 gcc_assert (GET_MODE_SIZE (mode) <= size);
24337 /* See if block is big or small, handle small blocks. */
24338 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24340 int size2 = size;
24341 loop_label = gen_label_rtx ();
24343 if (!*done_label)
24344 *done_label = gen_label_rtx ();
24346 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24347 1, loop_label);
24348 size2 >>= 1;
24350 /* Handle sizes > 3. */
24351 for (;size2 > 2; size2 >>= 1)
24352 expand_small_movmem_or_setmem (destmem, srcmem,
24353 *destptr, *srcptr,
24354 value, vec_value,
24355 *count,
24356 size2, *done_label, issetmem);
24357 /* Nothing to copy? Jump to DONE_LABEL if so */
24358 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24359 1, *done_label);
24361 /* Do a byte copy. */
24362 destmem = change_address (destmem, QImode, *destptr);
24363 if (issetmem)
24364 emit_move_insn (destmem, gen_lowpart (QImode, value));
24365 else
24367 srcmem = change_address (srcmem, QImode, *srcptr);
24368 emit_move_insn (destmem, srcmem);
24371 /* Handle sizes 2 and 3. */
24372 label = ix86_expand_aligntest (*count, 2, false);
24373 destmem = change_address (destmem, HImode, *destptr);
24374 destmem = offset_address (destmem, *count, 1);
24375 destmem = offset_address (destmem, GEN_INT (-2), 2);
24376 if (issetmem)
24377 emit_move_insn (destmem, gen_lowpart (HImode, value));
24378 else
24380 srcmem = change_address (srcmem, HImode, *srcptr);
24381 srcmem = offset_address (srcmem, *count, 1);
24382 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24383 emit_move_insn (destmem, srcmem);
24386 emit_label (label);
24387 LABEL_NUSES (label) = 1;
24388 emit_jump_insn (gen_jump (*done_label));
24389 emit_barrier ();
24391 else
24392 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24393 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24395 /* Start memcpy for COUNT >= SIZE. */
24396 if (loop_label)
24398 emit_label (loop_label);
24399 LABEL_NUSES (loop_label) = 1;
24402 /* Copy first desired_align bytes. */
24403 if (!issetmem)
24404 srcmem = change_address (srcmem, mode, *srcptr);
24405 destmem = change_address (destmem, mode, *destptr);
24406 modesize = GEN_INT (GET_MODE_SIZE (mode));
24407 for (n = 0; prolog_size < desired_align - align; n++)
24409 if (issetmem)
24410 emit_move_insn (destmem, mode_value);
24411 else
24413 emit_move_insn (destmem, srcmem);
24414 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24416 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24417 prolog_size += GET_MODE_SIZE (mode);
24421 /* Copy last SIZE bytes. */
24422 destmem = offset_address (destmem, *count, 1);
24423 destmem = offset_address (destmem,
24424 GEN_INT (-size - prolog_size),
24426 if (issetmem)
24427 emit_move_insn (destmem, mode_value);
24428 else
24430 srcmem = offset_address (srcmem, *count, 1);
24431 srcmem = offset_address (srcmem,
24432 GEN_INT (-size - prolog_size),
24434 emit_move_insn (destmem, srcmem);
24436 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24438 destmem = offset_address (destmem, modesize, 1);
24439 if (issetmem)
24440 emit_move_insn (destmem, mode_value);
24441 else
24443 srcmem = offset_address (srcmem, modesize, 1);
24444 emit_move_insn (destmem, srcmem);
24448 /* Align destination. */
24449 if (desired_align > 1 && desired_align > align)
24451 rtx saveddest = *destptr;
24453 gcc_assert (desired_align <= size);
24454 /* Align destptr up, place it to new register. */
24455 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24456 GEN_INT (prolog_size),
24457 NULL_RTX, 1, OPTAB_DIRECT);
24458 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24459 REG_POINTER (*destptr) = 1;
24460 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24461 GEN_INT (-desired_align),
24462 *destptr, 1, OPTAB_DIRECT);
24463 /* See how many bytes we skipped. */
24464 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24465 *destptr,
24466 saveddest, 1, OPTAB_DIRECT);
24467 /* Adjust srcptr and count. */
24468 if (!issetmem)
24469 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24470 saveddest, *srcptr, 1, OPTAB_DIRECT);
24471 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24472 saveddest, *count, 1, OPTAB_DIRECT);
24473 /* We copied at most size + prolog_size. */
24474 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24475 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24476 else
24477 *min_size = 0;
24479 /* Our loops always round down the bock size, but for dispatch to library
24480 we need precise value. */
24481 if (dynamic_check)
24482 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24483 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24485 else
24487 gcc_assert (prolog_size == 0);
24488 /* Decrease count, so we won't end up copying last word twice. */
24489 if (!CONST_INT_P (*count))
24490 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24491 constm1_rtx, *count, 1, OPTAB_DIRECT);
24492 else
24493 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24494 if (*min_size)
24495 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24500 /* This function is like the previous one, except here we know how many bytes
24501 need to be copied. That allows us to update alignment not only of DST, which
24502 is returned, but also of SRC, which is passed as a pointer for that
24503 reason. */
24504 static rtx
24505 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24506 rtx srcreg, rtx value, rtx vec_value,
24507 int desired_align, int align_bytes,
24508 bool issetmem)
24510 rtx src = NULL;
24511 rtx orig_dst = dst;
24512 rtx orig_src = NULL;
24513 int piece_size = 1;
24514 int copied_bytes = 0;
24516 if (!issetmem)
24518 gcc_assert (srcp != NULL);
24519 src = *srcp;
24520 orig_src = src;
24523 for (piece_size = 1;
24524 piece_size <= desired_align && copied_bytes < align_bytes;
24525 piece_size <<= 1)
24527 if (align_bytes & piece_size)
24529 if (issetmem)
24531 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24532 dst = emit_memset (dst, destreg, vec_value, piece_size);
24533 else
24534 dst = emit_memset (dst, destreg, value, piece_size);
24536 else
24537 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24538 copied_bytes += piece_size;
24541 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24542 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24543 if (MEM_SIZE_KNOWN_P (orig_dst))
24544 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24546 if (!issetmem)
24548 int src_align_bytes = get_mem_align_offset (src, desired_align
24549 * BITS_PER_UNIT);
24550 if (src_align_bytes >= 0)
24551 src_align_bytes = desired_align - src_align_bytes;
24552 if (src_align_bytes >= 0)
24554 unsigned int src_align;
24555 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24557 if ((src_align_bytes & (src_align - 1))
24558 == (align_bytes & (src_align - 1)))
24559 break;
24561 if (src_align > (unsigned int) desired_align)
24562 src_align = desired_align;
24563 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24564 set_mem_align (src, src_align * BITS_PER_UNIT);
24566 if (MEM_SIZE_KNOWN_P (orig_src))
24567 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24568 *srcp = src;
24571 return dst;
24574 /* Return true if ALG can be used in current context.
24575 Assume we expand memset if MEMSET is true. */
24576 static bool
24577 alg_usable_p (enum stringop_alg alg, bool memset)
24579 if (alg == no_stringop)
24580 return false;
24581 if (alg == vector_loop)
24582 return TARGET_SSE || TARGET_AVX;
24583 /* Algorithms using the rep prefix want at least edi and ecx;
24584 additionally, memset wants eax and memcpy wants esi. Don't
24585 consider such algorithms if the user has appropriated those
24586 registers for their own purposes. */
24587 if (alg == rep_prefix_1_byte
24588 || alg == rep_prefix_4_byte
24589 || alg == rep_prefix_8_byte)
24590 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24591 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24592 return true;
24595 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24596 static enum stringop_alg
24597 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24598 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24599 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24601 const struct stringop_algs * algs;
24602 bool optimize_for_speed;
24603 int max = 0;
24604 const struct processor_costs *cost;
24605 int i;
24606 bool any_alg_usable_p = false;
24608 *noalign = false;
24609 *dynamic_check = -1;
24611 /* Even if the string operation call is cold, we still might spend a lot
24612 of time processing large blocks. */
24613 if (optimize_function_for_size_p (cfun)
24614 || (optimize_insn_for_size_p ()
24615 && (max_size < 256
24616 || (expected_size != -1 && expected_size < 256))))
24617 optimize_for_speed = false;
24618 else
24619 optimize_for_speed = true;
24621 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24622 if (memset)
24623 algs = &cost->memset[TARGET_64BIT != 0];
24624 else
24625 algs = &cost->memcpy[TARGET_64BIT != 0];
24627 /* See maximal size for user defined algorithm. */
24628 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24630 enum stringop_alg candidate = algs->size[i].alg;
24631 bool usable = alg_usable_p (candidate, memset);
24632 any_alg_usable_p |= usable;
24634 if (candidate != libcall && candidate && usable)
24635 max = algs->size[i].max;
24638 /* If expected size is not known but max size is small enough
24639 so inline version is a win, set expected size into
24640 the range. */
24641 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24642 && expected_size == -1)
24643 expected_size = min_size / 2 + max_size / 2;
24645 /* If user specified the algorithm, honnor it if possible. */
24646 if (ix86_stringop_alg != no_stringop
24647 && alg_usable_p (ix86_stringop_alg, memset))
24648 return ix86_stringop_alg;
24649 /* rep; movq or rep; movl is the smallest variant. */
24650 else if (!optimize_for_speed)
24652 *noalign = true;
24653 if (!count || (count & 3) || (memset && !zero_memset))
24654 return alg_usable_p (rep_prefix_1_byte, memset)
24655 ? rep_prefix_1_byte : loop_1_byte;
24656 else
24657 return alg_usable_p (rep_prefix_4_byte, memset)
24658 ? rep_prefix_4_byte : loop;
24660 /* Very tiny blocks are best handled via the loop, REP is expensive to
24661 setup. */
24662 else if (expected_size != -1 && expected_size < 4)
24663 return loop_1_byte;
24664 else if (expected_size != -1)
24666 enum stringop_alg alg = libcall;
24667 bool alg_noalign = false;
24668 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24670 /* We get here if the algorithms that were not libcall-based
24671 were rep-prefix based and we are unable to use rep prefixes
24672 based on global register usage. Break out of the loop and
24673 use the heuristic below. */
24674 if (algs->size[i].max == 0)
24675 break;
24676 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24678 enum stringop_alg candidate = algs->size[i].alg;
24680 if (candidate != libcall && alg_usable_p (candidate, memset))
24682 alg = candidate;
24683 alg_noalign = algs->size[i].noalign;
24685 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24686 last non-libcall inline algorithm. */
24687 if (TARGET_INLINE_ALL_STRINGOPS)
24689 /* When the current size is best to be copied by a libcall,
24690 but we are still forced to inline, run the heuristic below
24691 that will pick code for medium sized blocks. */
24692 if (alg != libcall)
24694 *noalign = alg_noalign;
24695 return alg;
24697 else if (!any_alg_usable_p)
24698 break;
24700 else if (alg_usable_p (candidate, memset))
24702 *noalign = algs->size[i].noalign;
24703 return candidate;
24708 /* When asked to inline the call anyway, try to pick meaningful choice.
24709 We look for maximal size of block that is faster to copy by hand and
24710 take blocks of at most of that size guessing that average size will
24711 be roughly half of the block.
24713 If this turns out to be bad, we might simply specify the preferred
24714 choice in ix86_costs. */
24715 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24716 && (algs->unknown_size == libcall
24717 || !alg_usable_p (algs->unknown_size, memset)))
24719 enum stringop_alg alg;
24721 /* If there aren't any usable algorithms, then recursing on
24722 smaller sizes isn't going to find anything. Just return the
24723 simple byte-at-a-time copy loop. */
24724 if (!any_alg_usable_p)
24726 /* Pick something reasonable. */
24727 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24728 *dynamic_check = 128;
24729 return loop_1_byte;
24731 if (max <= 0)
24732 max = 4096;
24733 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24734 zero_memset, dynamic_check, noalign);
24735 gcc_assert (*dynamic_check == -1);
24736 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24737 *dynamic_check = max;
24738 else
24739 gcc_assert (alg != libcall);
24740 return alg;
24742 return (alg_usable_p (algs->unknown_size, memset)
24743 ? algs->unknown_size : libcall);
24746 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24747 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24748 static int
24749 decide_alignment (int align,
24750 enum stringop_alg alg,
24751 int expected_size,
24752 machine_mode move_mode)
24754 int desired_align = 0;
24756 gcc_assert (alg != no_stringop);
24758 if (alg == libcall)
24759 return 0;
24760 if (move_mode == VOIDmode)
24761 return 0;
24763 desired_align = GET_MODE_SIZE (move_mode);
24764 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24765 copying whole cacheline at once. */
24766 if (TARGET_PENTIUMPRO
24767 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24768 desired_align = 8;
24770 if (optimize_size)
24771 desired_align = 1;
24772 if (desired_align < align)
24773 desired_align = align;
24774 if (expected_size != -1 && expected_size < 4)
24775 desired_align = align;
24777 return desired_align;
24781 /* Helper function for memcpy. For QImode value 0xXY produce
24782 0xXYXYXYXY of wide specified by MODE. This is essentially
24783 a * 0x10101010, but we can do slightly better than
24784 synth_mult by unwinding the sequence by hand on CPUs with
24785 slow multiply. */
24786 static rtx
24787 promote_duplicated_reg (machine_mode mode, rtx val)
24789 machine_mode valmode = GET_MODE (val);
24790 rtx tmp;
24791 int nops = mode == DImode ? 3 : 2;
24793 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24794 if (val == const0_rtx)
24795 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24796 if (CONST_INT_P (val))
24798 HOST_WIDE_INT v = INTVAL (val) & 255;
24800 v |= v << 8;
24801 v |= v << 16;
24802 if (mode == DImode)
24803 v |= (v << 16) << 16;
24804 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24807 if (valmode == VOIDmode)
24808 valmode = QImode;
24809 if (valmode != QImode)
24810 val = gen_lowpart (QImode, val);
24811 if (mode == QImode)
24812 return val;
24813 if (!TARGET_PARTIAL_REG_STALL)
24814 nops--;
24815 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24816 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24817 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24818 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24820 rtx reg = convert_modes (mode, QImode, val, true);
24821 tmp = promote_duplicated_reg (mode, const1_rtx);
24822 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24823 OPTAB_DIRECT);
24825 else
24827 rtx reg = convert_modes (mode, QImode, val, true);
24829 if (!TARGET_PARTIAL_REG_STALL)
24830 if (mode == SImode)
24831 emit_insn (gen_insvsi_1 (reg, reg));
24832 else
24833 emit_insn (gen_insvdi_1 (reg, reg));
24834 else
24836 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24837 NULL, 1, OPTAB_DIRECT);
24838 reg =
24839 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24841 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24842 NULL, 1, OPTAB_DIRECT);
24843 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24844 if (mode == SImode)
24845 return reg;
24846 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24847 NULL, 1, OPTAB_DIRECT);
24848 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24849 return reg;
24853 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24854 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24855 alignment from ALIGN to DESIRED_ALIGN. */
24856 static rtx
24857 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24858 int align)
24860 rtx promoted_val;
24862 if (TARGET_64BIT
24863 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24864 promoted_val = promote_duplicated_reg (DImode, val);
24865 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24866 promoted_val = promote_duplicated_reg (SImode, val);
24867 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24868 promoted_val = promote_duplicated_reg (HImode, val);
24869 else
24870 promoted_val = val;
24872 return promoted_val;
24875 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24876 operations when profitable. The code depends upon architecture, block size
24877 and alignment, but always has one of the following overall structures:
24879 Aligned move sequence:
24881 1) Prologue guard: Conditional that jumps up to epilogues for small
24882 blocks that can be handled by epilogue alone. This is faster
24883 but also needed for correctness, since prologue assume the block
24884 is larger than the desired alignment.
24886 Optional dynamic check for size and libcall for large
24887 blocks is emitted here too, with -minline-stringops-dynamically.
24889 2) Prologue: copy first few bytes in order to get destination
24890 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24891 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24892 copied. We emit either a jump tree on power of two sized
24893 blocks, or a byte loop.
24895 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24896 with specified algorithm.
24898 4) Epilogue: code copying tail of the block that is too small to be
24899 handled by main body (or up to size guarded by prologue guard).
24901 Misaligned move sequence
24903 1) missaligned move prologue/epilogue containing:
24904 a) Prologue handling small memory blocks and jumping to done_label
24905 (skipped if blocks are known to be large enough)
24906 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24907 needed by single possibly misaligned move
24908 (skipped if alignment is not needed)
24909 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24911 2) Zero size guard dispatching to done_label, if needed
24913 3) dispatch to library call, if needed,
24915 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24916 with specified algorithm. */
24917 bool
24918 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24919 rtx align_exp, rtx expected_align_exp,
24920 rtx expected_size_exp, rtx min_size_exp,
24921 rtx max_size_exp, rtx probable_max_size_exp,
24922 bool issetmem)
24924 rtx destreg;
24925 rtx srcreg = NULL;
24926 rtx_code_label *label = NULL;
24927 rtx tmp;
24928 rtx_code_label *jump_around_label = NULL;
24929 HOST_WIDE_INT align = 1;
24930 unsigned HOST_WIDE_INT count = 0;
24931 HOST_WIDE_INT expected_size = -1;
24932 int size_needed = 0, epilogue_size_needed;
24933 int desired_align = 0, align_bytes = 0;
24934 enum stringop_alg alg;
24935 rtx promoted_val = NULL;
24936 rtx vec_promoted_val = NULL;
24937 bool force_loopy_epilogue = false;
24938 int dynamic_check;
24939 bool need_zero_guard = false;
24940 bool noalign;
24941 machine_mode move_mode = VOIDmode;
24942 int unroll_factor = 1;
24943 /* TODO: Once value ranges are available, fill in proper data. */
24944 unsigned HOST_WIDE_INT min_size = 0;
24945 unsigned HOST_WIDE_INT max_size = -1;
24946 unsigned HOST_WIDE_INT probable_max_size = -1;
24947 bool misaligned_prologue_used = false;
24949 if (CONST_INT_P (align_exp))
24950 align = INTVAL (align_exp);
24951 /* i386 can do misaligned access on reasonably increased cost. */
24952 if (CONST_INT_P (expected_align_exp)
24953 && INTVAL (expected_align_exp) > align)
24954 align = INTVAL (expected_align_exp);
24955 /* ALIGN is the minimum of destination and source alignment, but we care here
24956 just about destination alignment. */
24957 else if (!issetmem
24958 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24959 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24961 if (CONST_INT_P (count_exp))
24963 min_size = max_size = probable_max_size = count = expected_size
24964 = INTVAL (count_exp);
24965 /* When COUNT is 0, there is nothing to do. */
24966 if (!count)
24967 return true;
24969 else
24971 if (min_size_exp)
24972 min_size = INTVAL (min_size_exp);
24973 if (max_size_exp)
24974 max_size = INTVAL (max_size_exp);
24975 if (probable_max_size_exp)
24976 probable_max_size = INTVAL (probable_max_size_exp);
24977 if (CONST_INT_P (expected_size_exp))
24978 expected_size = INTVAL (expected_size_exp);
24981 /* Make sure we don't need to care about overflow later on. */
24982 if (count > (HOST_WIDE_INT_1U << 30))
24983 return false;
24985 /* Step 0: Decide on preferred algorithm, desired alignment and
24986 size of chunks to be copied by main loop. */
24987 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24988 issetmem,
24989 issetmem && val_exp == const0_rtx,
24990 &dynamic_check, &noalign);
24991 if (alg == libcall)
24992 return false;
24993 gcc_assert (alg != no_stringop);
24995 /* For now vector-version of memset is generated only for memory zeroing, as
24996 creating of promoted vector value is very cheap in this case. */
24997 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24998 alg = unrolled_loop;
25000 if (!count)
25001 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
25002 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
25003 if (!issetmem)
25004 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
25006 unroll_factor = 1;
25007 move_mode = word_mode;
25008 switch (alg)
25010 case libcall:
25011 case no_stringop:
25012 case last_alg:
25013 gcc_unreachable ();
25014 case loop_1_byte:
25015 need_zero_guard = true;
25016 move_mode = QImode;
25017 break;
25018 case loop:
25019 need_zero_guard = true;
25020 break;
25021 case unrolled_loop:
25022 need_zero_guard = true;
25023 unroll_factor = (TARGET_64BIT ? 4 : 2);
25024 break;
25025 case vector_loop:
25026 need_zero_guard = true;
25027 unroll_factor = 4;
25028 /* Find the widest supported mode. */
25029 move_mode = word_mode;
25030 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
25031 != CODE_FOR_nothing)
25032 move_mode = GET_MODE_WIDER_MODE (move_mode);
25034 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25035 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25036 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25038 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25039 move_mode = mode_for_vector (word_mode, nunits);
25040 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
25041 move_mode = word_mode;
25043 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
25044 break;
25045 case rep_prefix_8_byte:
25046 move_mode = DImode;
25047 break;
25048 case rep_prefix_4_byte:
25049 move_mode = SImode;
25050 break;
25051 case rep_prefix_1_byte:
25052 move_mode = QImode;
25053 break;
25055 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
25056 epilogue_size_needed = size_needed;
25058 desired_align = decide_alignment (align, alg, expected_size, move_mode);
25059 if (!TARGET_ALIGN_STRINGOPS || noalign)
25060 align = desired_align;
25062 /* Step 1: Prologue guard. */
25064 /* Alignment code needs count to be in register. */
25065 if (CONST_INT_P (count_exp) && desired_align > align)
25067 if (INTVAL (count_exp) > desired_align
25068 && INTVAL (count_exp) > size_needed)
25070 align_bytes
25071 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25072 if (align_bytes <= 0)
25073 align_bytes = 0;
25074 else
25075 align_bytes = desired_align - align_bytes;
25077 if (align_bytes == 0)
25078 count_exp = force_reg (counter_mode (count_exp), count_exp);
25080 gcc_assert (desired_align >= 1 && align >= 1);
25082 /* Misaligned move sequences handle both prologue and epilogue at once.
25083 Default code generation results in a smaller code for large alignments
25084 and also avoids redundant job when sizes are known precisely. */
25085 misaligned_prologue_used
25086 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25087 && MAX (desired_align, epilogue_size_needed) <= 32
25088 && desired_align <= epilogue_size_needed
25089 && ((desired_align > align && !align_bytes)
25090 || (!count && epilogue_size_needed > 1)));
25092 /* Do the cheap promotion to allow better CSE across the
25093 main loop and epilogue (ie one load of the big constant in the
25094 front of all code.
25095 For now the misaligned move sequences do not have fast path
25096 without broadcasting. */
25097 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25099 if (alg == vector_loop)
25101 gcc_assert (val_exp == const0_rtx);
25102 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25103 promoted_val = promote_duplicated_reg_to_size (val_exp,
25104 GET_MODE_SIZE (word_mode),
25105 desired_align, align);
25107 else
25109 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25110 desired_align, align);
25113 /* Misaligned move sequences handles both prologues and epilogues at once.
25114 Default code generation results in smaller code for large alignments and
25115 also avoids redundant job when sizes are known precisely. */
25116 if (misaligned_prologue_used)
25118 /* Misaligned move prologue handled small blocks by itself. */
25119 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25120 (dst, src, &destreg, &srcreg,
25121 move_mode, promoted_val, vec_promoted_val,
25122 &count_exp,
25123 &jump_around_label,
25124 desired_align < align
25125 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25126 desired_align, align, &min_size, dynamic_check, issetmem);
25127 if (!issetmem)
25128 src = change_address (src, BLKmode, srcreg);
25129 dst = change_address (dst, BLKmode, destreg);
25130 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25131 epilogue_size_needed = 0;
25132 if (need_zero_guard
25133 && min_size < (unsigned HOST_WIDE_INT) size_needed)
25135 /* It is possible that we copied enough so the main loop will not
25136 execute. */
25137 gcc_assert (size_needed > 1);
25138 if (jump_around_label == NULL_RTX)
25139 jump_around_label = gen_label_rtx ();
25140 emit_cmp_and_jump_insns (count_exp,
25141 GEN_INT (size_needed),
25142 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25143 if (expected_size == -1
25144 || expected_size < (desired_align - align) / 2 + size_needed)
25145 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25146 else
25147 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25150 /* Ensure that alignment prologue won't copy past end of block. */
25151 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25153 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25154 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25155 Make sure it is power of 2. */
25156 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25158 /* To improve performance of small blocks, we jump around the VAL
25159 promoting mode. This mean that if the promoted VAL is not constant,
25160 we might not use it in the epilogue and have to use byte
25161 loop variant. */
25162 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25163 force_loopy_epilogue = true;
25164 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25165 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25167 /* If main algorithm works on QImode, no epilogue is needed.
25168 For small sizes just don't align anything. */
25169 if (size_needed == 1)
25170 desired_align = align;
25171 else
25172 goto epilogue;
25174 else if (!count
25175 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25177 label = gen_label_rtx ();
25178 emit_cmp_and_jump_insns (count_exp,
25179 GEN_INT (epilogue_size_needed),
25180 LTU, 0, counter_mode (count_exp), 1, label);
25181 if (expected_size == -1 || expected_size < epilogue_size_needed)
25182 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25183 else
25184 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25188 /* Emit code to decide on runtime whether library call or inline should be
25189 used. */
25190 if (dynamic_check != -1)
25192 if (!issetmem && CONST_INT_P (count_exp))
25194 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25196 emit_block_move_via_libcall (dst, src, count_exp, false);
25197 count_exp = const0_rtx;
25198 goto epilogue;
25201 else
25203 rtx_code_label *hot_label = gen_label_rtx ();
25204 if (jump_around_label == NULL_RTX)
25205 jump_around_label = gen_label_rtx ();
25206 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25207 LEU, 0, counter_mode (count_exp),
25208 1, hot_label);
25209 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25210 if (issetmem)
25211 set_storage_via_libcall (dst, count_exp, val_exp, false);
25212 else
25213 emit_block_move_via_libcall (dst, src, count_exp, false);
25214 emit_jump (jump_around_label);
25215 emit_label (hot_label);
25219 /* Step 2: Alignment prologue. */
25220 /* Do the expensive promotion once we branched off the small blocks. */
25221 if (issetmem && !promoted_val)
25222 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25223 desired_align, align);
25225 if (desired_align > align && !misaligned_prologue_used)
25227 if (align_bytes == 0)
25229 /* Except for the first move in prologue, we no longer know
25230 constant offset in aliasing info. It don't seems to worth
25231 the pain to maintain it for the first move, so throw away
25232 the info early. */
25233 dst = change_address (dst, BLKmode, destreg);
25234 if (!issetmem)
25235 src = change_address (src, BLKmode, srcreg);
25236 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25237 promoted_val, vec_promoted_val,
25238 count_exp, align, desired_align,
25239 issetmem);
25240 /* At most desired_align - align bytes are copied. */
25241 if (min_size < (unsigned)(desired_align - align))
25242 min_size = 0;
25243 else
25244 min_size -= desired_align - align;
25246 else
25248 /* If we know how many bytes need to be stored before dst is
25249 sufficiently aligned, maintain aliasing info accurately. */
25250 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25251 srcreg,
25252 promoted_val,
25253 vec_promoted_val,
25254 desired_align,
25255 align_bytes,
25256 issetmem);
25258 count_exp = plus_constant (counter_mode (count_exp),
25259 count_exp, -align_bytes);
25260 count -= align_bytes;
25261 min_size -= align_bytes;
25262 max_size -= align_bytes;
25264 if (need_zero_guard
25265 && min_size < (unsigned HOST_WIDE_INT) size_needed
25266 && (count < (unsigned HOST_WIDE_INT) size_needed
25267 || (align_bytes == 0
25268 && count < ((unsigned HOST_WIDE_INT) size_needed
25269 + desired_align - align))))
25271 /* It is possible that we copied enough so the main loop will not
25272 execute. */
25273 gcc_assert (size_needed > 1);
25274 if (label == NULL_RTX)
25275 label = gen_label_rtx ();
25276 emit_cmp_and_jump_insns (count_exp,
25277 GEN_INT (size_needed),
25278 LTU, 0, counter_mode (count_exp), 1, label);
25279 if (expected_size == -1
25280 || expected_size < (desired_align - align) / 2 + size_needed)
25281 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25282 else
25283 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25286 if (label && size_needed == 1)
25288 emit_label (label);
25289 LABEL_NUSES (label) = 1;
25290 label = NULL;
25291 epilogue_size_needed = 1;
25292 if (issetmem)
25293 promoted_val = val_exp;
25295 else if (label == NULL_RTX && !misaligned_prologue_used)
25296 epilogue_size_needed = size_needed;
25298 /* Step 3: Main loop. */
25300 switch (alg)
25302 case libcall:
25303 case no_stringop:
25304 case last_alg:
25305 gcc_unreachable ();
25306 case loop_1_byte:
25307 case loop:
25308 case unrolled_loop:
25309 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25310 count_exp, move_mode, unroll_factor,
25311 expected_size, issetmem);
25312 break;
25313 case vector_loop:
25314 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25315 vec_promoted_val, count_exp, move_mode,
25316 unroll_factor, expected_size, issetmem);
25317 break;
25318 case rep_prefix_8_byte:
25319 case rep_prefix_4_byte:
25320 case rep_prefix_1_byte:
25321 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25322 val_exp, count_exp, move_mode, issetmem);
25323 break;
25325 /* Adjust properly the offset of src and dest memory for aliasing. */
25326 if (CONST_INT_P (count_exp))
25328 if (!issetmem)
25329 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25330 (count / size_needed) * size_needed);
25331 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25332 (count / size_needed) * size_needed);
25334 else
25336 if (!issetmem)
25337 src = change_address (src, BLKmode, srcreg);
25338 dst = change_address (dst, BLKmode, destreg);
25341 /* Step 4: Epilogue to copy the remaining bytes. */
25342 epilogue:
25343 if (label)
25345 /* When the main loop is done, COUNT_EXP might hold original count,
25346 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25347 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25348 bytes. Compensate if needed. */
25350 if (size_needed < epilogue_size_needed)
25352 tmp =
25353 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25354 GEN_INT (size_needed - 1), count_exp, 1,
25355 OPTAB_DIRECT);
25356 if (tmp != count_exp)
25357 emit_move_insn (count_exp, tmp);
25359 emit_label (label);
25360 LABEL_NUSES (label) = 1;
25363 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25365 if (force_loopy_epilogue)
25366 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25367 epilogue_size_needed);
25368 else
25370 if (issetmem)
25371 expand_setmem_epilogue (dst, destreg, promoted_val,
25372 vec_promoted_val, count_exp,
25373 epilogue_size_needed);
25374 else
25375 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25376 epilogue_size_needed);
25379 if (jump_around_label)
25380 emit_label (jump_around_label);
25381 return true;
25385 /* Expand the appropriate insns for doing strlen if not just doing
25386 repnz; scasb
25388 out = result, initialized with the start address
25389 align_rtx = alignment of the address.
25390 scratch = scratch register, initialized with the startaddress when
25391 not aligned, otherwise undefined
25393 This is just the body. It needs the initializations mentioned above and
25394 some address computing at the end. These things are done in i386.md. */
25396 static void
25397 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25399 int align;
25400 rtx tmp;
25401 rtx_code_label *align_2_label = NULL;
25402 rtx_code_label *align_3_label = NULL;
25403 rtx_code_label *align_4_label = gen_label_rtx ();
25404 rtx_code_label *end_0_label = gen_label_rtx ();
25405 rtx mem;
25406 rtx tmpreg = gen_reg_rtx (SImode);
25407 rtx scratch = gen_reg_rtx (SImode);
25408 rtx cmp;
25410 align = 0;
25411 if (CONST_INT_P (align_rtx))
25412 align = INTVAL (align_rtx);
25414 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25416 /* Is there a known alignment and is it less than 4? */
25417 if (align < 4)
25419 rtx scratch1 = gen_reg_rtx (Pmode);
25420 emit_move_insn (scratch1, out);
25421 /* Is there a known alignment and is it not 2? */
25422 if (align != 2)
25424 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25425 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25427 /* Leave just the 3 lower bits. */
25428 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25429 NULL_RTX, 0, OPTAB_WIDEN);
25431 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25432 Pmode, 1, align_4_label);
25433 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25434 Pmode, 1, align_2_label);
25435 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25436 Pmode, 1, align_3_label);
25438 else
25440 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25441 check if is aligned to 4 - byte. */
25443 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25444 NULL_RTX, 0, OPTAB_WIDEN);
25446 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25447 Pmode, 1, align_4_label);
25450 mem = change_address (src, QImode, out);
25452 /* Now compare the bytes. */
25454 /* Compare the first n unaligned byte on a byte per byte basis. */
25455 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25456 QImode, 1, end_0_label);
25458 /* Increment the address. */
25459 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25461 /* Not needed with an alignment of 2 */
25462 if (align != 2)
25464 emit_label (align_2_label);
25466 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25467 end_0_label);
25469 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25471 emit_label (align_3_label);
25474 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25475 end_0_label);
25477 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25480 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25481 align this loop. It gives only huge programs, but does not help to
25482 speed up. */
25483 emit_label (align_4_label);
25485 mem = change_address (src, SImode, out);
25486 emit_move_insn (scratch, mem);
25487 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25489 /* This formula yields a nonzero result iff one of the bytes is zero.
25490 This saves three branches inside loop and many cycles. */
25492 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25493 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25494 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25495 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25496 gen_int_mode (0x80808080, SImode)));
25497 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25498 align_4_label);
25500 if (TARGET_CMOVE)
25502 rtx reg = gen_reg_rtx (SImode);
25503 rtx reg2 = gen_reg_rtx (Pmode);
25504 emit_move_insn (reg, tmpreg);
25505 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25507 /* If zero is not in the first two bytes, move two bytes forward. */
25508 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25509 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25510 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25511 emit_insn (gen_rtx_SET (tmpreg,
25512 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25513 reg,
25514 tmpreg)));
25515 /* Emit lea manually to avoid clobbering of flags. */
25516 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25518 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25519 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25520 emit_insn (gen_rtx_SET (out,
25521 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25522 reg2,
25523 out)));
25525 else
25527 rtx_code_label *end_2_label = gen_label_rtx ();
25528 /* Is zero in the first two bytes? */
25530 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25531 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25532 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25533 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25534 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25535 pc_rtx);
25536 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25537 JUMP_LABEL (tmp) = end_2_label;
25539 /* Not in the first two. Move two bytes forward. */
25540 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25541 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25543 emit_label (end_2_label);
25547 /* Avoid branch in fixing the byte. */
25548 tmpreg = gen_lowpart (QImode, tmpreg);
25549 emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
25550 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25551 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25552 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25554 emit_label (end_0_label);
25557 /* Expand strlen. */
25559 bool
25560 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25562 rtx addr, scratch1, scratch2, scratch3, scratch4;
25564 /* The generic case of strlen expander is long. Avoid it's
25565 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25567 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25568 && !TARGET_INLINE_ALL_STRINGOPS
25569 && !optimize_insn_for_size_p ()
25570 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25571 return false;
25573 addr = force_reg (Pmode, XEXP (src, 0));
25574 scratch1 = gen_reg_rtx (Pmode);
25576 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25577 && !optimize_insn_for_size_p ())
25579 /* Well it seems that some optimizer does not combine a call like
25580 foo(strlen(bar), strlen(bar));
25581 when the move and the subtraction is done here. It does calculate
25582 the length just once when these instructions are done inside of
25583 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25584 often used and I use one fewer register for the lifetime of
25585 output_strlen_unroll() this is better. */
25587 emit_move_insn (out, addr);
25589 ix86_expand_strlensi_unroll_1 (out, src, align);
25591 /* strlensi_unroll_1 returns the address of the zero at the end of
25592 the string, like memchr(), so compute the length by subtracting
25593 the start address. */
25594 emit_insn (ix86_gen_sub3 (out, out, addr));
25596 else
25598 rtx unspec;
25600 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25601 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25602 return false;
25604 scratch2 = gen_reg_rtx (Pmode);
25605 scratch3 = gen_reg_rtx (Pmode);
25606 scratch4 = force_reg (Pmode, constm1_rtx);
25608 emit_move_insn (scratch3, addr);
25609 eoschar = force_reg (QImode, eoschar);
25611 src = replace_equiv_address_nv (src, scratch3);
25613 /* If .md starts supporting :P, this can be done in .md. */
25614 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25615 scratch4), UNSPEC_SCAS);
25616 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25617 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25618 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25620 return true;
25623 /* For given symbol (function) construct code to compute address of it's PLT
25624 entry in large x86-64 PIC model. */
25625 static rtx
25626 construct_plt_address (rtx symbol)
25628 rtx tmp, unspec;
25630 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25631 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25632 gcc_assert (Pmode == DImode);
25634 tmp = gen_reg_rtx (Pmode);
25635 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25637 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25638 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25639 return tmp;
25643 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25644 rtx callarg2,
25645 rtx pop, bool sibcall)
25647 rtx vec[3];
25648 rtx use = NULL, call;
25649 unsigned int vec_len = 0;
25651 if (pop == const0_rtx)
25652 pop = NULL;
25653 gcc_assert (!TARGET_64BIT || !pop);
25655 if (TARGET_MACHO && !TARGET_64BIT)
25657 #if TARGET_MACHO
25658 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25659 fnaddr = machopic_indirect_call_target (fnaddr);
25660 #endif
25662 else
25664 /* Static functions and indirect calls don't need the pic register. Also,
25665 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25666 it an indirect call. */
25667 if (flag_pic
25668 && (!TARGET_64BIT
25669 || (ix86_cmodel == CM_LARGE_PIC
25670 && DEFAULT_ABI != MS_ABI))
25671 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25672 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25673 && flag_plt
25674 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25675 || !lookup_attribute ("noplt",
25676 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25678 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25679 if (ix86_use_pseudo_pic_reg ())
25680 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25681 pic_offset_table_rtx);
25685 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25686 parameters passed in vector registers. */
25687 if (TARGET_64BIT
25688 && (INTVAL (callarg2) > 0
25689 || (INTVAL (callarg2) == 0
25690 && (TARGET_SSE || !flag_skip_rax_setup))))
25692 rtx al = gen_rtx_REG (QImode, AX_REG);
25693 emit_move_insn (al, callarg2);
25694 use_reg (&use, al);
25697 if (ix86_cmodel == CM_LARGE_PIC
25698 && !TARGET_PECOFF
25699 && MEM_P (fnaddr)
25700 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25701 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25702 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25703 else if (sibcall
25704 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25705 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25707 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25708 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25711 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25713 if (retval)
25715 /* We should add bounds as destination register in case
25716 pointer with bounds may be returned. */
25717 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25719 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25720 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25721 if (GET_CODE (retval) == PARALLEL)
25723 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25724 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25725 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25726 retval = chkp_join_splitted_slot (retval, par);
25728 else
25730 retval = gen_rtx_PARALLEL (VOIDmode,
25731 gen_rtvec (3, retval, b0, b1));
25732 chkp_put_regs_to_expr_list (retval);
25736 call = gen_rtx_SET (retval, call);
25738 vec[vec_len++] = call;
25740 if (pop)
25742 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25743 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25744 vec[vec_len++] = pop;
25747 if (TARGET_64BIT_MS_ABI
25748 && (!callarg2 || INTVAL (callarg2) != -2))
25750 int const cregs_size
25751 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25752 int i;
25754 for (i = 0; i < cregs_size; i++)
25756 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25757 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25759 clobber_reg (&use, gen_rtx_REG (mode, regno));
25763 if (vec_len > 1)
25764 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25765 call = emit_call_insn (call);
25766 if (use)
25767 CALL_INSN_FUNCTION_USAGE (call) = use;
25769 return call;
25772 /* Return true if the function being called was marked with attribute "noplt"
25773 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25774 handle the non-PIC case in the backend because there is no easy interface
25775 for the front-end to force non-PLT calls to use the GOT. This is currently
25776 used only with 64-bit ELF targets to call the function marked "noplt"
25777 indirectly. */
25779 static bool
25780 ix86_nopic_noplt_attribute_p (rtx call_op)
25782 if (flag_pic || ix86_cmodel == CM_LARGE
25783 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25784 || SYMBOL_REF_LOCAL_P (call_op))
25785 return false;
25787 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25789 if (!flag_plt
25790 || (symbol_decl != NULL_TREE
25791 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25792 return true;
25794 return false;
25797 /* Output the assembly for a call instruction. */
25799 const char *
25800 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25802 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25803 bool seh_nop_p = false;
25804 const char *xasm;
25806 if (SIBLING_CALL_P (insn))
25808 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25809 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
25810 else if (direct_p)
25811 xasm = "%!jmp\t%P0";
25812 /* SEH epilogue detection requires the indirect branch case
25813 to include REX.W. */
25814 else if (TARGET_SEH)
25815 xasm = "%!rex.W jmp %A0";
25816 else
25817 xasm = "%!jmp\t%A0";
25819 output_asm_insn (xasm, &call_op);
25820 return "";
25823 /* SEH unwinding can require an extra nop to be emitted in several
25824 circumstances. Determine if we have one of those. */
25825 if (TARGET_SEH)
25827 rtx_insn *i;
25829 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25831 /* If we get to another real insn, we don't need the nop. */
25832 if (INSN_P (i))
25833 break;
25835 /* If we get to the epilogue note, prevent a catch region from
25836 being adjacent to the standard epilogue sequence. If non-
25837 call-exceptions, we'll have done this during epilogue emission. */
25838 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25839 && !flag_non_call_exceptions
25840 && !can_throw_internal (insn))
25842 seh_nop_p = true;
25843 break;
25847 /* If we didn't find a real insn following the call, prevent the
25848 unwinder from looking into the next function. */
25849 if (i == NULL)
25850 seh_nop_p = true;
25853 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25854 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
25855 else if (direct_p)
25856 xasm = "%!call\t%P0";
25857 else
25858 xasm = "%!call\t%A0";
25860 output_asm_insn (xasm, &call_op);
25862 if (seh_nop_p)
25863 return "nop";
25865 return "";
25868 /* Clear stack slot assignments remembered from previous functions.
25869 This is called from INIT_EXPANDERS once before RTL is emitted for each
25870 function. */
25872 static struct machine_function *
25873 ix86_init_machine_status (void)
25875 struct machine_function *f;
25877 f = ggc_cleared_alloc<machine_function> ();
25878 f->use_fast_prologue_epilogue_nregs = -1;
25879 f->call_abi = ix86_abi;
25881 return f;
25884 /* Return a MEM corresponding to a stack slot with mode MODE.
25885 Allocate a new slot if necessary.
25887 The RTL for a function can have several slots available: N is
25888 which slot to use. */
25891 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25893 struct stack_local_entry *s;
25895 gcc_assert (n < MAX_386_STACK_LOCALS);
25897 for (s = ix86_stack_locals; s; s = s->next)
25898 if (s->mode == mode && s->n == n)
25899 return validize_mem (copy_rtx (s->rtl));
25901 s = ggc_alloc<stack_local_entry> ();
25902 s->n = n;
25903 s->mode = mode;
25904 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25906 s->next = ix86_stack_locals;
25907 ix86_stack_locals = s;
25908 return validize_mem (copy_rtx (s->rtl));
25911 static void
25912 ix86_instantiate_decls (void)
25914 struct stack_local_entry *s;
25916 for (s = ix86_stack_locals; s; s = s->next)
25917 if (s->rtl != NULL_RTX)
25918 instantiate_decl_rtl (s->rtl);
25921 /* Check whether x86 address PARTS is a pc-relative address. */
25923 static bool
25924 rip_relative_addr_p (struct ix86_address *parts)
25926 rtx base, index, disp;
25928 base = parts->base;
25929 index = parts->index;
25930 disp = parts->disp;
25932 if (disp && !base && !index)
25934 if (TARGET_64BIT)
25936 rtx symbol = disp;
25938 if (GET_CODE (disp) == CONST)
25939 symbol = XEXP (disp, 0);
25940 if (GET_CODE (symbol) == PLUS
25941 && CONST_INT_P (XEXP (symbol, 1)))
25942 symbol = XEXP (symbol, 0);
25944 if (GET_CODE (symbol) == LABEL_REF
25945 || (GET_CODE (symbol) == SYMBOL_REF
25946 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25947 || (GET_CODE (symbol) == UNSPEC
25948 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25949 || XINT (symbol, 1) == UNSPEC_PCREL
25950 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25951 return true;
25954 return false;
25957 /* Calculate the length of the memory address in the instruction encoding.
25958 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25959 or other prefixes. We never generate addr32 prefix for LEA insn. */
25962 memory_address_length (rtx addr, bool lea)
25964 struct ix86_address parts;
25965 rtx base, index, disp;
25966 int len;
25967 int ok;
25969 if (GET_CODE (addr) == PRE_DEC
25970 || GET_CODE (addr) == POST_INC
25971 || GET_CODE (addr) == PRE_MODIFY
25972 || GET_CODE (addr) == POST_MODIFY)
25973 return 0;
25975 ok = ix86_decompose_address (addr, &parts);
25976 gcc_assert (ok);
25978 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25980 /* If this is not LEA instruction, add the length of addr32 prefix. */
25981 if (TARGET_64BIT && !lea
25982 && (SImode_address_operand (addr, VOIDmode)
25983 || (parts.base && GET_MODE (parts.base) == SImode)
25984 || (parts.index && GET_MODE (parts.index) == SImode)))
25985 len++;
25987 base = parts.base;
25988 index = parts.index;
25989 disp = parts.disp;
25991 if (base && SUBREG_P (base))
25992 base = SUBREG_REG (base);
25993 if (index && SUBREG_P (index))
25994 index = SUBREG_REG (index);
25996 gcc_assert (base == NULL_RTX || REG_P (base));
25997 gcc_assert (index == NULL_RTX || REG_P (index));
25999 /* Rule of thumb:
26000 - esp as the base always wants an index,
26001 - ebp as the base always wants a displacement,
26002 - r12 as the base always wants an index,
26003 - r13 as the base always wants a displacement. */
26005 /* Register Indirect. */
26006 if (base && !index && !disp)
26008 /* esp (for its index) and ebp (for its displacement) need
26009 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
26010 code. */
26011 if (base == arg_pointer_rtx
26012 || base == frame_pointer_rtx
26013 || REGNO (base) == SP_REG
26014 || REGNO (base) == BP_REG
26015 || REGNO (base) == R12_REG
26016 || REGNO (base) == R13_REG)
26017 len++;
26020 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
26021 is not disp32, but disp32(%rip), so for disp32
26022 SIB byte is needed, unless print_operand_address
26023 optimizes it into disp32(%rip) or (%rip) is implied
26024 by UNSPEC. */
26025 else if (disp && !base && !index)
26027 len += 4;
26028 if (rip_relative_addr_p (&parts))
26029 len++;
26031 else
26033 /* Find the length of the displacement constant. */
26034 if (disp)
26036 if (base && satisfies_constraint_K (disp))
26037 len += 1;
26038 else
26039 len += 4;
26041 /* ebp always wants a displacement. Similarly r13. */
26042 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
26043 len++;
26045 /* An index requires the two-byte modrm form.... */
26046 if (index
26047 /* ...like esp (or r12), which always wants an index. */
26048 || base == arg_pointer_rtx
26049 || base == frame_pointer_rtx
26050 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
26051 len++;
26054 return len;
26057 /* Compute default value for "length_immediate" attribute. When SHORTFORM
26058 is set, expect that insn have 8bit immediate alternative. */
26060 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
26062 int len = 0;
26063 int i;
26064 extract_insn_cached (insn);
26065 for (i = recog_data.n_operands - 1; i >= 0; --i)
26066 if (CONSTANT_P (recog_data.operand[i]))
26068 enum attr_mode mode = get_attr_mode (insn);
26070 gcc_assert (!len);
26071 if (shortform && CONST_INT_P (recog_data.operand[i]))
26073 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26074 switch (mode)
26076 case MODE_QI:
26077 len = 1;
26078 continue;
26079 case MODE_HI:
26080 ival = trunc_int_for_mode (ival, HImode);
26081 break;
26082 case MODE_SI:
26083 ival = trunc_int_for_mode (ival, SImode);
26084 break;
26085 default:
26086 break;
26088 if (IN_RANGE (ival, -128, 127))
26090 len = 1;
26091 continue;
26094 switch (mode)
26096 case MODE_QI:
26097 len = 1;
26098 break;
26099 case MODE_HI:
26100 len = 2;
26101 break;
26102 case MODE_SI:
26103 len = 4;
26104 break;
26105 /* Immediates for DImode instructions are encoded
26106 as 32bit sign extended values. */
26107 case MODE_DI:
26108 len = 4;
26109 break;
26110 default:
26111 fatal_insn ("unknown insn mode", insn);
26114 return len;
26117 /* Compute default value for "length_address" attribute. */
26119 ix86_attr_length_address_default (rtx_insn *insn)
26121 int i;
26123 if (get_attr_type (insn) == TYPE_LEA)
26125 rtx set = PATTERN (insn), addr;
26127 if (GET_CODE (set) == PARALLEL)
26128 set = XVECEXP (set, 0, 0);
26130 gcc_assert (GET_CODE (set) == SET);
26132 addr = SET_SRC (set);
26134 return memory_address_length (addr, true);
26137 extract_insn_cached (insn);
26138 for (i = recog_data.n_operands - 1; i >= 0; --i)
26139 if (MEM_P (recog_data.operand[i]))
26141 constrain_operands_cached (insn, reload_completed);
26142 if (which_alternative != -1)
26144 const char *constraints = recog_data.constraints[i];
26145 int alt = which_alternative;
26147 while (*constraints == '=' || *constraints == '+')
26148 constraints++;
26149 while (alt-- > 0)
26150 while (*constraints++ != ',')
26152 /* Skip ignored operands. */
26153 if (*constraints == 'X')
26154 continue;
26156 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26158 return 0;
26161 /* Compute default value for "length_vex" attribute. It includes
26162 2 or 3 byte VEX prefix and 1 opcode byte. */
26165 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26166 bool has_vex_w)
26168 int i;
26170 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26171 byte VEX prefix. */
26172 if (!has_0f_opcode || has_vex_w)
26173 return 3 + 1;
26175 /* We can always use 2 byte VEX prefix in 32bit. */
26176 if (!TARGET_64BIT)
26177 return 2 + 1;
26179 extract_insn_cached (insn);
26181 for (i = recog_data.n_operands - 1; i >= 0; --i)
26182 if (REG_P (recog_data.operand[i]))
26184 /* REX.W bit uses 3 byte VEX prefix. */
26185 if (GET_MODE (recog_data.operand[i]) == DImode
26186 && GENERAL_REG_P (recog_data.operand[i]))
26187 return 3 + 1;
26189 else
26191 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26192 if (MEM_P (recog_data.operand[i])
26193 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26194 return 3 + 1;
26197 return 2 + 1;
26200 /* Return the maximum number of instructions a cpu can issue. */
26202 static int
26203 ix86_issue_rate (void)
26205 switch (ix86_tune)
26207 case PROCESSOR_PENTIUM:
26208 case PROCESSOR_IAMCU:
26209 case PROCESSOR_BONNELL:
26210 case PROCESSOR_SILVERMONT:
26211 case PROCESSOR_KNL:
26212 case PROCESSOR_INTEL:
26213 case PROCESSOR_K6:
26214 case PROCESSOR_BTVER2:
26215 case PROCESSOR_PENTIUM4:
26216 case PROCESSOR_NOCONA:
26217 return 2;
26219 case PROCESSOR_PENTIUMPRO:
26220 case PROCESSOR_ATHLON:
26221 case PROCESSOR_K8:
26222 case PROCESSOR_AMDFAM10:
26223 case PROCESSOR_GENERIC:
26224 case PROCESSOR_BTVER1:
26225 return 3;
26227 case PROCESSOR_BDVER1:
26228 case PROCESSOR_BDVER2:
26229 case PROCESSOR_BDVER3:
26230 case PROCESSOR_BDVER4:
26231 case PROCESSOR_CORE2:
26232 case PROCESSOR_NEHALEM:
26233 case PROCESSOR_SANDYBRIDGE:
26234 case PROCESSOR_HASWELL:
26235 return 4;
26237 default:
26238 return 1;
26242 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26243 by DEP_INSN and nothing set by DEP_INSN. */
26245 static bool
26246 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26248 rtx set, set2;
26250 /* Simplify the test for uninteresting insns. */
26251 if (insn_type != TYPE_SETCC
26252 && insn_type != TYPE_ICMOV
26253 && insn_type != TYPE_FCMOV
26254 && insn_type != TYPE_IBR)
26255 return false;
26257 if ((set = single_set (dep_insn)) != 0)
26259 set = SET_DEST (set);
26260 set2 = NULL_RTX;
26262 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26263 && XVECLEN (PATTERN (dep_insn), 0) == 2
26264 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26265 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26267 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26268 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26270 else
26271 return false;
26273 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26274 return false;
26276 /* This test is true if the dependent insn reads the flags but
26277 not any other potentially set register. */
26278 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26279 return false;
26281 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26282 return false;
26284 return true;
26287 /* Return true iff USE_INSN has a memory address with operands set by
26288 SET_INSN. */
26290 bool
26291 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26293 int i;
26294 extract_insn_cached (use_insn);
26295 for (i = recog_data.n_operands - 1; i >= 0; --i)
26296 if (MEM_P (recog_data.operand[i]))
26298 rtx addr = XEXP (recog_data.operand[i], 0);
26299 return modified_in_p (addr, set_insn) != 0;
26301 return false;
26304 /* Helper function for exact_store_load_dependency.
26305 Return true if addr is found in insn. */
26306 static bool
26307 exact_dependency_1 (rtx addr, rtx insn)
26309 enum rtx_code code;
26310 const char *format_ptr;
26311 int i, j;
26313 code = GET_CODE (insn);
26314 switch (code)
26316 case MEM:
26317 if (rtx_equal_p (addr, insn))
26318 return true;
26319 break;
26320 case REG:
26321 CASE_CONST_ANY:
26322 case SYMBOL_REF:
26323 case CODE_LABEL:
26324 case PC:
26325 case CC0:
26326 case EXPR_LIST:
26327 return false;
26328 default:
26329 break;
26332 format_ptr = GET_RTX_FORMAT (code);
26333 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26335 switch (*format_ptr++)
26337 case 'e':
26338 if (exact_dependency_1 (addr, XEXP (insn, i)))
26339 return true;
26340 break;
26341 case 'E':
26342 for (j = 0; j < XVECLEN (insn, i); j++)
26343 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26344 return true;
26345 break;
26348 return false;
26351 /* Return true if there exists exact dependency for store & load, i.e.
26352 the same memory address is used in them. */
26353 static bool
26354 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26356 rtx set1, set2;
26358 set1 = single_set (store);
26359 if (!set1)
26360 return false;
26361 if (!MEM_P (SET_DEST (set1)))
26362 return false;
26363 set2 = single_set (load);
26364 if (!set2)
26365 return false;
26366 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26367 return true;
26368 return false;
26371 static int
26372 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26374 enum attr_type insn_type, dep_insn_type;
26375 enum attr_memory memory;
26376 rtx set, set2;
26377 int dep_insn_code_number;
26379 /* Anti and output dependencies have zero cost on all CPUs. */
26380 if (REG_NOTE_KIND (link) != 0)
26381 return 0;
26383 dep_insn_code_number = recog_memoized (dep_insn);
26385 /* If we can't recognize the insns, we can't really do anything. */
26386 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26387 return cost;
26389 insn_type = get_attr_type (insn);
26390 dep_insn_type = get_attr_type (dep_insn);
26392 switch (ix86_tune)
26394 case PROCESSOR_PENTIUM:
26395 case PROCESSOR_IAMCU:
26396 /* Address Generation Interlock adds a cycle of latency. */
26397 if (insn_type == TYPE_LEA)
26399 rtx addr = PATTERN (insn);
26401 if (GET_CODE (addr) == PARALLEL)
26402 addr = XVECEXP (addr, 0, 0);
26404 gcc_assert (GET_CODE (addr) == SET);
26406 addr = SET_SRC (addr);
26407 if (modified_in_p (addr, dep_insn))
26408 cost += 1;
26410 else if (ix86_agi_dependent (dep_insn, insn))
26411 cost += 1;
26413 /* ??? Compares pair with jump/setcc. */
26414 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26415 cost = 0;
26417 /* Floating point stores require value to be ready one cycle earlier. */
26418 if (insn_type == TYPE_FMOV
26419 && get_attr_memory (insn) == MEMORY_STORE
26420 && !ix86_agi_dependent (dep_insn, insn))
26421 cost += 1;
26422 break;
26424 case PROCESSOR_PENTIUMPRO:
26425 /* INT->FP conversion is expensive. */
26426 if (get_attr_fp_int_src (dep_insn))
26427 cost += 5;
26429 /* There is one cycle extra latency between an FP op and a store. */
26430 if (insn_type == TYPE_FMOV
26431 && (set = single_set (dep_insn)) != NULL_RTX
26432 && (set2 = single_set (insn)) != NULL_RTX
26433 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26434 && MEM_P (SET_DEST (set2)))
26435 cost += 1;
26437 memory = get_attr_memory (insn);
26439 /* Show ability of reorder buffer to hide latency of load by executing
26440 in parallel with previous instruction in case
26441 previous instruction is not needed to compute the address. */
26442 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26443 && !ix86_agi_dependent (dep_insn, insn))
26445 /* Claim moves to take one cycle, as core can issue one load
26446 at time and the next load can start cycle later. */
26447 if (dep_insn_type == TYPE_IMOV
26448 || dep_insn_type == TYPE_FMOV)
26449 cost = 1;
26450 else if (cost > 1)
26451 cost--;
26453 break;
26455 case PROCESSOR_K6:
26456 /* The esp dependency is resolved before
26457 the instruction is really finished. */
26458 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26459 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26460 return 1;
26462 /* INT->FP conversion is expensive. */
26463 if (get_attr_fp_int_src (dep_insn))
26464 cost += 5;
26466 memory = get_attr_memory (insn);
26468 /* Show ability of reorder buffer to hide latency of load by executing
26469 in parallel with previous instruction in case
26470 previous instruction is not needed to compute the address. */
26471 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26472 && !ix86_agi_dependent (dep_insn, insn))
26474 /* Claim moves to take one cycle, as core can issue one load
26475 at time and the next load can start cycle later. */
26476 if (dep_insn_type == TYPE_IMOV
26477 || dep_insn_type == TYPE_FMOV)
26478 cost = 1;
26479 else if (cost > 2)
26480 cost -= 2;
26481 else
26482 cost = 1;
26484 break;
26486 case PROCESSOR_AMDFAM10:
26487 case PROCESSOR_BDVER1:
26488 case PROCESSOR_BDVER2:
26489 case PROCESSOR_BDVER3:
26490 case PROCESSOR_BDVER4:
26491 case PROCESSOR_BTVER1:
26492 case PROCESSOR_BTVER2:
26493 case PROCESSOR_GENERIC:
26494 /* Stack engine allows to execute push&pop instructions in parall. */
26495 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26496 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26497 return 0;
26498 /* FALLTHRU */
26500 case PROCESSOR_ATHLON:
26501 case PROCESSOR_K8:
26502 memory = get_attr_memory (insn);
26504 /* Show ability of reorder buffer to hide latency of load by executing
26505 in parallel with previous instruction in case
26506 previous instruction is not needed to compute the address. */
26507 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26508 && !ix86_agi_dependent (dep_insn, insn))
26510 enum attr_unit unit = get_attr_unit (insn);
26511 int loadcost = 3;
26513 /* Because of the difference between the length of integer and
26514 floating unit pipeline preparation stages, the memory operands
26515 for floating point are cheaper.
26517 ??? For Athlon it the difference is most probably 2. */
26518 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26519 loadcost = 3;
26520 else
26521 loadcost = TARGET_ATHLON ? 2 : 0;
26523 if (cost >= loadcost)
26524 cost -= loadcost;
26525 else
26526 cost = 0;
26528 break;
26530 case PROCESSOR_CORE2:
26531 case PROCESSOR_NEHALEM:
26532 case PROCESSOR_SANDYBRIDGE:
26533 case PROCESSOR_HASWELL:
26534 /* Stack engine allows to execute push&pop instructions in parall. */
26535 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26536 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26537 return 0;
26539 memory = get_attr_memory (insn);
26541 /* Show ability of reorder buffer to hide latency of load by executing
26542 in parallel with previous instruction in case
26543 previous instruction is not needed to compute the address. */
26544 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26545 && !ix86_agi_dependent (dep_insn, insn))
26547 if (cost >= 4)
26548 cost -= 4;
26549 else
26550 cost = 0;
26552 break;
26554 case PROCESSOR_SILVERMONT:
26555 case PROCESSOR_KNL:
26556 case PROCESSOR_INTEL:
26557 if (!reload_completed)
26558 return cost;
26560 /* Increase cost of integer loads. */
26561 memory = get_attr_memory (dep_insn);
26562 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26564 enum attr_unit unit = get_attr_unit (dep_insn);
26565 if (unit == UNIT_INTEGER && cost == 1)
26567 if (memory == MEMORY_LOAD)
26568 cost = 3;
26569 else
26571 /* Increase cost of ld/st for short int types only
26572 because of store forwarding issue. */
26573 rtx set = single_set (dep_insn);
26574 if (set && (GET_MODE (SET_DEST (set)) == QImode
26575 || GET_MODE (SET_DEST (set)) == HImode))
26577 /* Increase cost of store/load insn if exact
26578 dependence exists and it is load insn. */
26579 enum attr_memory insn_memory = get_attr_memory (insn);
26580 if (insn_memory == MEMORY_LOAD
26581 && exact_store_load_dependency (dep_insn, insn))
26582 cost = 3;
26588 default:
26589 break;
26592 return cost;
26595 /* How many alternative schedules to try. This should be as wide as the
26596 scheduling freedom in the DFA, but no wider. Making this value too
26597 large results extra work for the scheduler. */
26599 static int
26600 ia32_multipass_dfa_lookahead (void)
26602 switch (ix86_tune)
26604 case PROCESSOR_PENTIUM:
26605 case PROCESSOR_IAMCU:
26606 return 2;
26608 case PROCESSOR_PENTIUMPRO:
26609 case PROCESSOR_K6:
26610 return 1;
26612 case PROCESSOR_BDVER1:
26613 case PROCESSOR_BDVER2:
26614 case PROCESSOR_BDVER3:
26615 case PROCESSOR_BDVER4:
26616 /* We use lookahead value 4 for BD both before and after reload
26617 schedules. Plan is to have value 8 included for O3. */
26618 return 4;
26620 case PROCESSOR_CORE2:
26621 case PROCESSOR_NEHALEM:
26622 case PROCESSOR_SANDYBRIDGE:
26623 case PROCESSOR_HASWELL:
26624 case PROCESSOR_BONNELL:
26625 case PROCESSOR_SILVERMONT:
26626 case PROCESSOR_KNL:
26627 case PROCESSOR_INTEL:
26628 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26629 as many instructions can be executed on a cycle, i.e.,
26630 issue_rate. I wonder why tuning for many CPUs does not do this. */
26631 if (reload_completed)
26632 return ix86_issue_rate ();
26633 /* Don't use lookahead for pre-reload schedule to save compile time. */
26634 return 0;
26636 default:
26637 return 0;
26641 /* Return true if target platform supports macro-fusion. */
26643 static bool
26644 ix86_macro_fusion_p ()
26646 return TARGET_FUSE_CMP_AND_BRANCH;
26649 /* Check whether current microarchitecture support macro fusion
26650 for insn pair "CONDGEN + CONDJMP". Refer to
26651 "Intel Architectures Optimization Reference Manual". */
26653 static bool
26654 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26656 rtx src, dest;
26657 enum rtx_code ccode;
26658 rtx compare_set = NULL_RTX, test_if, cond;
26659 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26661 if (!any_condjump_p (condjmp))
26662 return false;
26664 if (get_attr_type (condgen) != TYPE_TEST
26665 && get_attr_type (condgen) != TYPE_ICMP
26666 && get_attr_type (condgen) != TYPE_INCDEC
26667 && get_attr_type (condgen) != TYPE_ALU)
26668 return false;
26670 compare_set = single_set (condgen);
26671 if (compare_set == NULL_RTX
26672 && !TARGET_FUSE_ALU_AND_BRANCH)
26673 return false;
26675 if (compare_set == NULL_RTX)
26677 int i;
26678 rtx pat = PATTERN (condgen);
26679 for (i = 0; i < XVECLEN (pat, 0); i++)
26680 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26682 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26683 if (GET_CODE (set_src) == COMPARE)
26684 compare_set = XVECEXP (pat, 0, i);
26685 else
26686 alu_set = XVECEXP (pat, 0, i);
26689 if (compare_set == NULL_RTX)
26690 return false;
26691 src = SET_SRC (compare_set);
26692 if (GET_CODE (src) != COMPARE)
26693 return false;
26695 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26696 supported. */
26697 if ((MEM_P (XEXP (src, 0))
26698 && CONST_INT_P (XEXP (src, 1)))
26699 || (MEM_P (XEXP (src, 1))
26700 && CONST_INT_P (XEXP (src, 0))))
26701 return false;
26703 /* No fusion for RIP-relative address. */
26704 if (MEM_P (XEXP (src, 0)))
26705 addr = XEXP (XEXP (src, 0), 0);
26706 else if (MEM_P (XEXP (src, 1)))
26707 addr = XEXP (XEXP (src, 1), 0);
26709 if (addr) {
26710 ix86_address parts;
26711 int ok = ix86_decompose_address (addr, &parts);
26712 gcc_assert (ok);
26714 if (rip_relative_addr_p (&parts))
26715 return false;
26718 test_if = SET_SRC (pc_set (condjmp));
26719 cond = XEXP (test_if, 0);
26720 ccode = GET_CODE (cond);
26721 /* Check whether conditional jump use Sign or Overflow Flags. */
26722 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26723 && (ccode == GE
26724 || ccode == GT
26725 || ccode == LE
26726 || ccode == LT))
26727 return false;
26729 /* Return true for TYPE_TEST and TYPE_ICMP. */
26730 if (get_attr_type (condgen) == TYPE_TEST
26731 || get_attr_type (condgen) == TYPE_ICMP)
26732 return true;
26734 /* The following is the case that macro-fusion for alu + jmp. */
26735 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26736 return false;
26738 /* No fusion for alu op with memory destination operand. */
26739 dest = SET_DEST (alu_set);
26740 if (MEM_P (dest))
26741 return false;
26743 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26744 supported. */
26745 if (get_attr_type (condgen) == TYPE_INCDEC
26746 && (ccode == GEU
26747 || ccode == GTU
26748 || ccode == LEU
26749 || ccode == LTU))
26750 return false;
26752 return true;
26755 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26756 execution. It is applied if
26757 (1) IMUL instruction is on the top of list;
26758 (2) There exists the only producer of independent IMUL instruction in
26759 ready list.
26760 Return index of IMUL producer if it was found and -1 otherwise. */
26761 static int
26762 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26764 rtx_insn *insn;
26765 rtx set, insn1, insn2;
26766 sd_iterator_def sd_it;
26767 dep_t dep;
26768 int index = -1;
26769 int i;
26771 if (!TARGET_BONNELL)
26772 return index;
26774 /* Check that IMUL instruction is on the top of ready list. */
26775 insn = ready[n_ready - 1];
26776 set = single_set (insn);
26777 if (!set)
26778 return index;
26779 if (!(GET_CODE (SET_SRC (set)) == MULT
26780 && GET_MODE (SET_SRC (set)) == SImode))
26781 return index;
26783 /* Search for producer of independent IMUL instruction. */
26784 for (i = n_ready - 2; i >= 0; i--)
26786 insn = ready[i];
26787 if (!NONDEBUG_INSN_P (insn))
26788 continue;
26789 /* Skip IMUL instruction. */
26790 insn2 = PATTERN (insn);
26791 if (GET_CODE (insn2) == PARALLEL)
26792 insn2 = XVECEXP (insn2, 0, 0);
26793 if (GET_CODE (insn2) == SET
26794 && GET_CODE (SET_SRC (insn2)) == MULT
26795 && GET_MODE (SET_SRC (insn2)) == SImode)
26796 continue;
26798 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26800 rtx con;
26801 con = DEP_CON (dep);
26802 if (!NONDEBUG_INSN_P (con))
26803 continue;
26804 insn1 = PATTERN (con);
26805 if (GET_CODE (insn1) == PARALLEL)
26806 insn1 = XVECEXP (insn1, 0, 0);
26808 if (GET_CODE (insn1) == SET
26809 && GET_CODE (SET_SRC (insn1)) == MULT
26810 && GET_MODE (SET_SRC (insn1)) == SImode)
26812 sd_iterator_def sd_it1;
26813 dep_t dep1;
26814 /* Check if there is no other dependee for IMUL. */
26815 index = i;
26816 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26818 rtx pro;
26819 pro = DEP_PRO (dep1);
26820 if (!NONDEBUG_INSN_P (pro))
26821 continue;
26822 if (pro != insn)
26823 index = -1;
26825 if (index >= 0)
26826 break;
26829 if (index >= 0)
26830 break;
26832 return index;
26835 /* Try to find the best candidate on the top of ready list if two insns
26836 have the same priority - candidate is best if its dependees were
26837 scheduled earlier. Applied for Silvermont only.
26838 Return true if top 2 insns must be interchanged. */
26839 static bool
26840 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26842 rtx_insn *top = ready[n_ready - 1];
26843 rtx_insn *next = ready[n_ready - 2];
26844 rtx set;
26845 sd_iterator_def sd_it;
26846 dep_t dep;
26847 int clock1 = -1;
26848 int clock2 = -1;
26849 #define INSN_TICK(INSN) (HID (INSN)->tick)
26851 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26852 return false;
26854 if (!NONDEBUG_INSN_P (top))
26855 return false;
26856 if (!NONJUMP_INSN_P (top))
26857 return false;
26858 if (!NONDEBUG_INSN_P (next))
26859 return false;
26860 if (!NONJUMP_INSN_P (next))
26861 return false;
26862 set = single_set (top);
26863 if (!set)
26864 return false;
26865 set = single_set (next);
26866 if (!set)
26867 return false;
26869 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26871 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26872 return false;
26873 /* Determine winner more precise. */
26874 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26876 rtx pro;
26877 pro = DEP_PRO (dep);
26878 if (!NONDEBUG_INSN_P (pro))
26879 continue;
26880 if (INSN_TICK (pro) > clock1)
26881 clock1 = INSN_TICK (pro);
26883 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26885 rtx pro;
26886 pro = DEP_PRO (dep);
26887 if (!NONDEBUG_INSN_P (pro))
26888 continue;
26889 if (INSN_TICK (pro) > clock2)
26890 clock2 = INSN_TICK (pro);
26893 if (clock1 == clock2)
26895 /* Determine winner - load must win. */
26896 enum attr_memory memory1, memory2;
26897 memory1 = get_attr_memory (top);
26898 memory2 = get_attr_memory (next);
26899 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26900 return true;
26902 return (bool) (clock2 < clock1);
26904 return false;
26905 #undef INSN_TICK
26908 /* Perform possible reodering of ready list for Atom/Silvermont only.
26909 Return issue rate. */
26910 static int
26911 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26912 int *pn_ready, int clock_var)
26914 int issue_rate = -1;
26915 int n_ready = *pn_ready;
26916 int i;
26917 rtx_insn *insn;
26918 int index = -1;
26920 /* Set up issue rate. */
26921 issue_rate = ix86_issue_rate ();
26923 /* Do reodering for BONNELL/SILVERMONT only. */
26924 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26925 return issue_rate;
26927 /* Nothing to do if ready list contains only 1 instruction. */
26928 if (n_ready <= 1)
26929 return issue_rate;
26931 /* Do reodering for post-reload scheduler only. */
26932 if (!reload_completed)
26933 return issue_rate;
26935 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26937 if (sched_verbose > 1)
26938 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26939 INSN_UID (ready[index]));
26941 /* Put IMUL producer (ready[index]) at the top of ready list. */
26942 insn = ready[index];
26943 for (i = index; i < n_ready - 1; i++)
26944 ready[i] = ready[i + 1];
26945 ready[n_ready - 1] = insn;
26946 return issue_rate;
26949 /* Skip selective scheduling since HID is not populated in it. */
26950 if (clock_var != 0
26951 && !sel_sched_p ()
26952 && swap_top_of_ready_list (ready, n_ready))
26954 if (sched_verbose > 1)
26955 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26956 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26957 /* Swap 2 top elements of ready list. */
26958 insn = ready[n_ready - 1];
26959 ready[n_ready - 1] = ready[n_ready - 2];
26960 ready[n_ready - 2] = insn;
26962 return issue_rate;
26965 static bool
26966 ix86_class_likely_spilled_p (reg_class_t);
26968 /* Returns true if lhs of insn is HW function argument register and set up
26969 is_spilled to true if it is likely spilled HW register. */
26970 static bool
26971 insn_is_function_arg (rtx insn, bool* is_spilled)
26973 rtx dst;
26975 if (!NONDEBUG_INSN_P (insn))
26976 return false;
26977 /* Call instructions are not movable, ignore it. */
26978 if (CALL_P (insn))
26979 return false;
26980 insn = PATTERN (insn);
26981 if (GET_CODE (insn) == PARALLEL)
26982 insn = XVECEXP (insn, 0, 0);
26983 if (GET_CODE (insn) != SET)
26984 return false;
26985 dst = SET_DEST (insn);
26986 if (REG_P (dst) && HARD_REGISTER_P (dst)
26987 && ix86_function_arg_regno_p (REGNO (dst)))
26989 /* Is it likely spilled HW register? */
26990 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26991 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26992 *is_spilled = true;
26993 return true;
26995 return false;
26998 /* Add output dependencies for chain of function adjacent arguments if only
26999 there is a move to likely spilled HW register. Return first argument
27000 if at least one dependence was added or NULL otherwise. */
27001 static rtx_insn *
27002 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
27004 rtx_insn *insn;
27005 rtx_insn *last = call;
27006 rtx_insn *first_arg = NULL;
27007 bool is_spilled = false;
27009 head = PREV_INSN (head);
27011 /* Find nearest to call argument passing instruction. */
27012 while (true)
27014 last = PREV_INSN (last);
27015 if (last == head)
27016 return NULL;
27017 if (!NONDEBUG_INSN_P (last))
27018 continue;
27019 if (insn_is_function_arg (last, &is_spilled))
27020 break;
27021 return NULL;
27024 first_arg = last;
27025 while (true)
27027 insn = PREV_INSN (last);
27028 if (!INSN_P (insn))
27029 break;
27030 if (insn == head)
27031 break;
27032 if (!NONDEBUG_INSN_P (insn))
27034 last = insn;
27035 continue;
27037 if (insn_is_function_arg (insn, &is_spilled))
27039 /* Add output depdendence between two function arguments if chain
27040 of output arguments contains likely spilled HW registers. */
27041 if (is_spilled)
27042 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27043 first_arg = last = insn;
27045 else
27046 break;
27048 if (!is_spilled)
27049 return NULL;
27050 return first_arg;
27053 /* Add output or anti dependency from insn to first_arg to restrict its code
27054 motion. */
27055 static void
27056 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
27058 rtx set;
27059 rtx tmp;
27061 /* Add anti dependencies for bounds stores. */
27062 if (INSN_P (insn)
27063 && GET_CODE (PATTERN (insn)) == PARALLEL
27064 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
27065 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
27067 add_dependence (first_arg, insn, REG_DEP_ANTI);
27068 return;
27071 set = single_set (insn);
27072 if (!set)
27073 return;
27074 tmp = SET_DEST (set);
27075 if (REG_P (tmp))
27077 /* Add output dependency to the first function argument. */
27078 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27079 return;
27081 /* Add anti dependency. */
27082 add_dependence (first_arg, insn, REG_DEP_ANTI);
27085 /* Avoid cross block motion of function argument through adding dependency
27086 from the first non-jump instruction in bb. */
27087 static void
27088 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27090 rtx_insn *insn = BB_END (bb);
27092 while (insn)
27094 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27096 rtx set = single_set (insn);
27097 if (set)
27099 avoid_func_arg_motion (arg, insn);
27100 return;
27103 if (insn == BB_HEAD (bb))
27104 return;
27105 insn = PREV_INSN (insn);
27109 /* Hook for pre-reload schedule - avoid motion of function arguments
27110 passed in likely spilled HW registers. */
27111 static void
27112 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27114 rtx_insn *insn;
27115 rtx_insn *first_arg = NULL;
27116 if (reload_completed)
27117 return;
27118 while (head != tail && DEBUG_INSN_P (head))
27119 head = NEXT_INSN (head);
27120 for (insn = tail; insn != head; insn = PREV_INSN (insn))
27121 if (INSN_P (insn) && CALL_P (insn))
27123 first_arg = add_parameter_dependencies (insn, head);
27124 if (first_arg)
27126 /* Add dependee for first argument to predecessors if only
27127 region contains more than one block. */
27128 basic_block bb = BLOCK_FOR_INSN (insn);
27129 int rgn = CONTAINING_RGN (bb->index);
27130 int nr_blks = RGN_NR_BLOCKS (rgn);
27131 /* Skip trivial regions and region head blocks that can have
27132 predecessors outside of region. */
27133 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27135 edge e;
27136 edge_iterator ei;
27138 /* Regions are SCCs with the exception of selective
27139 scheduling with pipelining of outer blocks enabled.
27140 So also check that immediate predecessors of a non-head
27141 block are in the same region. */
27142 FOR_EACH_EDGE (e, ei, bb->preds)
27144 /* Avoid creating of loop-carried dependencies through
27145 using topological ordering in the region. */
27146 if (rgn == CONTAINING_RGN (e->src->index)
27147 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27148 add_dependee_for_func_arg (first_arg, e->src);
27151 insn = first_arg;
27152 if (insn == head)
27153 break;
27156 else if (first_arg)
27157 avoid_func_arg_motion (first_arg, insn);
27160 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27161 HW registers to maximum, to schedule them at soon as possible. These are
27162 moves from function argument registers at the top of the function entry
27163 and moves from function return value registers after call. */
27164 static int
27165 ix86_adjust_priority (rtx_insn *insn, int priority)
27167 rtx set;
27169 if (reload_completed)
27170 return priority;
27172 if (!NONDEBUG_INSN_P (insn))
27173 return priority;
27175 set = single_set (insn);
27176 if (set)
27178 rtx tmp = SET_SRC (set);
27179 if (REG_P (tmp)
27180 && HARD_REGISTER_P (tmp)
27181 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27182 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27183 return current_sched_info->sched_max_insns_priority;
27186 return priority;
27189 /* Model decoder of Core 2/i7.
27190 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27191 track the instruction fetch block boundaries and make sure that long
27192 (9+ bytes) instructions are assigned to D0. */
27194 /* Maximum length of an insn that can be handled by
27195 a secondary decoder unit. '8' for Core 2/i7. */
27196 static int core2i7_secondary_decoder_max_insn_size;
27198 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27199 '16' for Core 2/i7. */
27200 static int core2i7_ifetch_block_size;
27202 /* Maximum number of instructions decoder can handle per cycle.
27203 '6' for Core 2/i7. */
27204 static int core2i7_ifetch_block_max_insns;
27206 typedef struct ix86_first_cycle_multipass_data_ *
27207 ix86_first_cycle_multipass_data_t;
27208 typedef const struct ix86_first_cycle_multipass_data_ *
27209 const_ix86_first_cycle_multipass_data_t;
27211 /* A variable to store target state across calls to max_issue within
27212 one cycle. */
27213 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27214 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27216 /* Initialize DATA. */
27217 static void
27218 core2i7_first_cycle_multipass_init (void *_data)
27220 ix86_first_cycle_multipass_data_t data
27221 = (ix86_first_cycle_multipass_data_t) _data;
27223 data->ifetch_block_len = 0;
27224 data->ifetch_block_n_insns = 0;
27225 data->ready_try_change = NULL;
27226 data->ready_try_change_size = 0;
27229 /* Advancing the cycle; reset ifetch block counts. */
27230 static void
27231 core2i7_dfa_post_advance_cycle (void)
27233 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27235 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27237 data->ifetch_block_len = 0;
27238 data->ifetch_block_n_insns = 0;
27241 static int min_insn_size (rtx_insn *);
27243 /* Filter out insns from ready_try that the core will not be able to issue
27244 on current cycle due to decoder. */
27245 static void
27246 core2i7_first_cycle_multipass_filter_ready_try
27247 (const_ix86_first_cycle_multipass_data_t data,
27248 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27250 while (n_ready--)
27252 rtx_insn *insn;
27253 int insn_size;
27255 if (ready_try[n_ready])
27256 continue;
27258 insn = get_ready_element (n_ready);
27259 insn_size = min_insn_size (insn);
27261 if (/* If this is a too long an insn for a secondary decoder ... */
27262 (!first_cycle_insn_p
27263 && insn_size > core2i7_secondary_decoder_max_insn_size)
27264 /* ... or it would not fit into the ifetch block ... */
27265 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27266 /* ... or the decoder is full already ... */
27267 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27268 /* ... mask the insn out. */
27270 ready_try[n_ready] = 1;
27272 if (data->ready_try_change)
27273 bitmap_set_bit (data->ready_try_change, n_ready);
27278 /* Prepare for a new round of multipass lookahead scheduling. */
27279 static void
27280 core2i7_first_cycle_multipass_begin (void *_data,
27281 signed char *ready_try, int n_ready,
27282 bool first_cycle_insn_p)
27284 ix86_first_cycle_multipass_data_t data
27285 = (ix86_first_cycle_multipass_data_t) _data;
27286 const_ix86_first_cycle_multipass_data_t prev_data
27287 = ix86_first_cycle_multipass_data;
27289 /* Restore the state from the end of the previous round. */
27290 data->ifetch_block_len = prev_data->ifetch_block_len;
27291 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27293 /* Filter instructions that cannot be issued on current cycle due to
27294 decoder restrictions. */
27295 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27296 first_cycle_insn_p);
27299 /* INSN is being issued in current solution. Account for its impact on
27300 the decoder model. */
27301 static void
27302 core2i7_first_cycle_multipass_issue (void *_data,
27303 signed char *ready_try, int n_ready,
27304 rtx_insn *insn, const void *_prev_data)
27306 ix86_first_cycle_multipass_data_t data
27307 = (ix86_first_cycle_multipass_data_t) _data;
27308 const_ix86_first_cycle_multipass_data_t prev_data
27309 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27311 int insn_size = min_insn_size (insn);
27313 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27314 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27315 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27316 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27318 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27319 if (!data->ready_try_change)
27321 data->ready_try_change = sbitmap_alloc (n_ready);
27322 data->ready_try_change_size = n_ready;
27324 else if (data->ready_try_change_size < n_ready)
27326 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27327 n_ready, 0);
27328 data->ready_try_change_size = n_ready;
27330 bitmap_clear (data->ready_try_change);
27332 /* Filter out insns from ready_try that the core will not be able to issue
27333 on current cycle due to decoder. */
27334 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27335 false);
27338 /* Revert the effect on ready_try. */
27339 static void
27340 core2i7_first_cycle_multipass_backtrack (const void *_data,
27341 signed char *ready_try,
27342 int n_ready ATTRIBUTE_UNUSED)
27344 const_ix86_first_cycle_multipass_data_t data
27345 = (const_ix86_first_cycle_multipass_data_t) _data;
27346 unsigned int i = 0;
27347 sbitmap_iterator sbi;
27349 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27350 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27352 ready_try[i] = 0;
27356 /* Save the result of multipass lookahead scheduling for the next round. */
27357 static void
27358 core2i7_first_cycle_multipass_end (const void *_data)
27360 const_ix86_first_cycle_multipass_data_t data
27361 = (const_ix86_first_cycle_multipass_data_t) _data;
27362 ix86_first_cycle_multipass_data_t next_data
27363 = ix86_first_cycle_multipass_data;
27365 if (data != NULL)
27367 next_data->ifetch_block_len = data->ifetch_block_len;
27368 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27372 /* Deallocate target data. */
27373 static void
27374 core2i7_first_cycle_multipass_fini (void *_data)
27376 ix86_first_cycle_multipass_data_t data
27377 = (ix86_first_cycle_multipass_data_t) _data;
27379 if (data->ready_try_change)
27381 sbitmap_free (data->ready_try_change);
27382 data->ready_try_change = NULL;
27383 data->ready_try_change_size = 0;
27387 /* Prepare for scheduling pass. */
27388 static void
27389 ix86_sched_init_global (FILE *, int, int)
27391 /* Install scheduling hooks for current CPU. Some of these hooks are used
27392 in time-critical parts of the scheduler, so we only set them up when
27393 they are actually used. */
27394 switch (ix86_tune)
27396 case PROCESSOR_CORE2:
27397 case PROCESSOR_NEHALEM:
27398 case PROCESSOR_SANDYBRIDGE:
27399 case PROCESSOR_HASWELL:
27400 /* Do not perform multipass scheduling for pre-reload schedule
27401 to save compile time. */
27402 if (reload_completed)
27404 targetm.sched.dfa_post_advance_cycle
27405 = core2i7_dfa_post_advance_cycle;
27406 targetm.sched.first_cycle_multipass_init
27407 = core2i7_first_cycle_multipass_init;
27408 targetm.sched.first_cycle_multipass_begin
27409 = core2i7_first_cycle_multipass_begin;
27410 targetm.sched.first_cycle_multipass_issue
27411 = core2i7_first_cycle_multipass_issue;
27412 targetm.sched.first_cycle_multipass_backtrack
27413 = core2i7_first_cycle_multipass_backtrack;
27414 targetm.sched.first_cycle_multipass_end
27415 = core2i7_first_cycle_multipass_end;
27416 targetm.sched.first_cycle_multipass_fini
27417 = core2i7_first_cycle_multipass_fini;
27419 /* Set decoder parameters. */
27420 core2i7_secondary_decoder_max_insn_size = 8;
27421 core2i7_ifetch_block_size = 16;
27422 core2i7_ifetch_block_max_insns = 6;
27423 break;
27425 /* ... Fall through ... */
27426 default:
27427 targetm.sched.dfa_post_advance_cycle = NULL;
27428 targetm.sched.first_cycle_multipass_init = NULL;
27429 targetm.sched.first_cycle_multipass_begin = NULL;
27430 targetm.sched.first_cycle_multipass_issue = NULL;
27431 targetm.sched.first_cycle_multipass_backtrack = NULL;
27432 targetm.sched.first_cycle_multipass_end = NULL;
27433 targetm.sched.first_cycle_multipass_fini = NULL;
27434 break;
27439 /* Compute the alignment given to a constant that is being placed in memory.
27440 EXP is the constant and ALIGN is the alignment that the object would
27441 ordinarily have.
27442 The value of this function is used instead of that alignment to align
27443 the object. */
27446 ix86_constant_alignment (tree exp, int align)
27448 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27449 || TREE_CODE (exp) == INTEGER_CST)
27451 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27452 return 64;
27453 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27454 return 128;
27456 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27457 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27458 return BITS_PER_WORD;
27460 return align;
27463 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
27464 the data type, and ALIGN is the alignment that the object would
27465 ordinarily have. */
27467 static int
27468 iamcu_alignment (tree type, int align)
27470 enum machine_mode mode;
27472 if (align < 32 || TYPE_USER_ALIGN (type))
27473 return align;
27475 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
27476 bytes. */
27477 mode = TYPE_MODE (strip_array_types (type));
27478 switch (GET_MODE_CLASS (mode))
27480 case MODE_INT:
27481 case MODE_COMPLEX_INT:
27482 case MODE_COMPLEX_FLOAT:
27483 case MODE_FLOAT:
27484 case MODE_DECIMAL_FLOAT:
27485 return 32;
27486 default:
27487 return align;
27491 /* Compute the alignment for a static variable.
27492 TYPE is the data type, and ALIGN is the alignment that
27493 the object would ordinarily have. The value of this function is used
27494 instead of that alignment to align the object. */
27497 ix86_data_alignment (tree type, int align, bool opt)
27499 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27500 for symbols from other compilation units or symbols that don't need
27501 to bind locally. In order to preserve some ABI compatibility with
27502 those compilers, ensure we don't decrease alignment from what we
27503 used to assume. */
27505 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27507 /* A data structure, equal or greater than the size of a cache line
27508 (64 bytes in the Pentium 4 and other recent Intel processors, including
27509 processors based on Intel Core microarchitecture) should be aligned
27510 so that its base address is a multiple of a cache line size. */
27512 int max_align
27513 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27515 if (max_align < BITS_PER_WORD)
27516 max_align = BITS_PER_WORD;
27518 switch (ix86_align_data_type)
27520 case ix86_align_data_type_abi: opt = false; break;
27521 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27522 case ix86_align_data_type_cacheline: break;
27525 if (TARGET_IAMCU)
27526 align = iamcu_alignment (type, align);
27528 if (opt
27529 && AGGREGATE_TYPE_P (type)
27530 && TYPE_SIZE (type)
27531 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27533 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27534 && align < max_align_compat)
27535 align = max_align_compat;
27536 if (wi::geu_p (TYPE_SIZE (type), max_align)
27537 && align < max_align)
27538 align = max_align;
27541 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27542 to 16byte boundary. */
27543 if (TARGET_64BIT)
27545 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27546 && TYPE_SIZE (type)
27547 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27548 && wi::geu_p (TYPE_SIZE (type), 128)
27549 && align < 128)
27550 return 128;
27553 if (!opt)
27554 return align;
27556 if (TREE_CODE (type) == ARRAY_TYPE)
27558 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27559 return 64;
27560 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27561 return 128;
27563 else if (TREE_CODE (type) == COMPLEX_TYPE)
27566 if (TYPE_MODE (type) == DCmode && align < 64)
27567 return 64;
27568 if ((TYPE_MODE (type) == XCmode
27569 || TYPE_MODE (type) == TCmode) && align < 128)
27570 return 128;
27572 else if ((TREE_CODE (type) == RECORD_TYPE
27573 || TREE_CODE (type) == UNION_TYPE
27574 || TREE_CODE (type) == QUAL_UNION_TYPE)
27575 && TYPE_FIELDS (type))
27577 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27578 return 64;
27579 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27580 return 128;
27582 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27583 || TREE_CODE (type) == INTEGER_TYPE)
27585 if (TYPE_MODE (type) == DFmode && align < 64)
27586 return 64;
27587 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27588 return 128;
27591 return align;
27594 /* Compute the alignment for a local variable or a stack slot. EXP is
27595 the data type or decl itself, MODE is the widest mode available and
27596 ALIGN is the alignment that the object would ordinarily have. The
27597 value of this macro is used instead of that alignment to align the
27598 object. */
27600 unsigned int
27601 ix86_local_alignment (tree exp, machine_mode mode,
27602 unsigned int align)
27604 tree type, decl;
27606 if (exp && DECL_P (exp))
27608 type = TREE_TYPE (exp);
27609 decl = exp;
27611 else
27613 type = exp;
27614 decl = NULL;
27617 /* Don't do dynamic stack realignment for long long objects with
27618 -mpreferred-stack-boundary=2. */
27619 if (!TARGET_64BIT
27620 && align == 64
27621 && ix86_preferred_stack_boundary < 64
27622 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27623 && (!type || !TYPE_USER_ALIGN (type))
27624 && (!decl || !DECL_USER_ALIGN (decl)))
27625 align = 32;
27627 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27628 register in MODE. We will return the largest alignment of XF
27629 and DF. */
27630 if (!type)
27632 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27633 align = GET_MODE_ALIGNMENT (DFmode);
27634 return align;
27637 /* Don't increase alignment for Intel MCU psABI. */
27638 if (TARGET_IAMCU)
27639 return align;
27641 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27642 to 16byte boundary. Exact wording is:
27644 An array uses the same alignment as its elements, except that a local or
27645 global array variable of length at least 16 bytes or
27646 a C99 variable-length array variable always has alignment of at least 16 bytes.
27648 This was added to allow use of aligned SSE instructions at arrays. This
27649 rule is meant for static storage (where compiler can not do the analysis
27650 by itself). We follow it for automatic variables only when convenient.
27651 We fully control everything in the function compiled and functions from
27652 other unit can not rely on the alignment.
27654 Exclude va_list type. It is the common case of local array where
27655 we can not benefit from the alignment.
27657 TODO: Probably one should optimize for size only when var is not escaping. */
27658 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27659 && TARGET_SSE)
27661 if (AGGREGATE_TYPE_P (type)
27662 && (va_list_type_node == NULL_TREE
27663 || (TYPE_MAIN_VARIANT (type)
27664 != TYPE_MAIN_VARIANT (va_list_type_node)))
27665 && TYPE_SIZE (type)
27666 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27667 && wi::geu_p (TYPE_SIZE (type), 16)
27668 && align < 128)
27669 return 128;
27671 if (TREE_CODE (type) == ARRAY_TYPE)
27673 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27674 return 64;
27675 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27676 return 128;
27678 else if (TREE_CODE (type) == COMPLEX_TYPE)
27680 if (TYPE_MODE (type) == DCmode && align < 64)
27681 return 64;
27682 if ((TYPE_MODE (type) == XCmode
27683 || TYPE_MODE (type) == TCmode) && align < 128)
27684 return 128;
27686 else if ((TREE_CODE (type) == RECORD_TYPE
27687 || TREE_CODE (type) == UNION_TYPE
27688 || TREE_CODE (type) == QUAL_UNION_TYPE)
27689 && TYPE_FIELDS (type))
27691 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27692 return 64;
27693 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27694 return 128;
27696 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27697 || TREE_CODE (type) == INTEGER_TYPE)
27700 if (TYPE_MODE (type) == DFmode && align < 64)
27701 return 64;
27702 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27703 return 128;
27705 return align;
27708 /* Compute the minimum required alignment for dynamic stack realignment
27709 purposes for a local variable, parameter or a stack slot. EXP is
27710 the data type or decl itself, MODE is its mode and ALIGN is the
27711 alignment that the object would ordinarily have. */
27713 unsigned int
27714 ix86_minimum_alignment (tree exp, machine_mode mode,
27715 unsigned int align)
27717 tree type, decl;
27719 if (exp && DECL_P (exp))
27721 type = TREE_TYPE (exp);
27722 decl = exp;
27724 else
27726 type = exp;
27727 decl = NULL;
27730 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27731 return align;
27733 /* Don't do dynamic stack realignment for long long objects with
27734 -mpreferred-stack-boundary=2. */
27735 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27736 && (!type || !TYPE_USER_ALIGN (type))
27737 && (!decl || !DECL_USER_ALIGN (decl)))
27738 return 32;
27740 return align;
27743 /* Find a location for the static chain incoming to a nested function.
27744 This is a register, unless all free registers are used by arguments. */
27746 static rtx
27747 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27749 unsigned regno;
27751 /* While this function won't be called by the middle-end when a static
27752 chain isn't needed, it's also used throughout the backend so it's
27753 easiest to keep this check centralized. */
27754 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27755 return NULL;
27757 if (TARGET_64BIT)
27759 /* We always use R10 in 64-bit mode. */
27760 regno = R10_REG;
27762 else
27764 const_tree fntype, fndecl;
27765 unsigned int ccvt;
27767 /* By default in 32-bit mode we use ECX to pass the static chain. */
27768 regno = CX_REG;
27770 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27772 fntype = TREE_TYPE (fndecl_or_type);
27773 fndecl = fndecl_or_type;
27775 else
27777 fntype = fndecl_or_type;
27778 fndecl = NULL;
27781 ccvt = ix86_get_callcvt (fntype);
27782 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27784 /* Fastcall functions use ecx/edx for arguments, which leaves
27785 us with EAX for the static chain.
27786 Thiscall functions use ecx for arguments, which also
27787 leaves us with EAX for the static chain. */
27788 regno = AX_REG;
27790 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27792 /* Thiscall functions use ecx for arguments, which leaves
27793 us with EAX and EDX for the static chain.
27794 We are using for abi-compatibility EAX. */
27795 regno = AX_REG;
27797 else if (ix86_function_regparm (fntype, fndecl) == 3)
27799 /* For regparm 3, we have no free call-clobbered registers in
27800 which to store the static chain. In order to implement this,
27801 we have the trampoline push the static chain to the stack.
27802 However, we can't push a value below the return address when
27803 we call the nested function directly, so we have to use an
27804 alternate entry point. For this we use ESI, and have the
27805 alternate entry point push ESI, so that things appear the
27806 same once we're executing the nested function. */
27807 if (incoming_p)
27809 if (fndecl == current_function_decl)
27810 ix86_static_chain_on_stack = true;
27811 return gen_frame_mem (SImode,
27812 plus_constant (Pmode,
27813 arg_pointer_rtx, -8));
27815 regno = SI_REG;
27819 return gen_rtx_REG (Pmode, regno);
27822 /* Emit RTL insns to initialize the variable parts of a trampoline.
27823 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27824 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27825 to be passed to the target function. */
27827 static void
27828 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27830 rtx mem, fnaddr;
27831 int opcode;
27832 int offset = 0;
27834 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27836 if (TARGET_64BIT)
27838 int size;
27840 /* Load the function address to r11. Try to load address using
27841 the shorter movl instead of movabs. We may want to support
27842 movq for kernel mode, but kernel does not use trampolines at
27843 the moment. FNADDR is a 32bit address and may not be in
27844 DImode when ptr_mode == SImode. Always use movl in this
27845 case. */
27846 if (ptr_mode == SImode
27847 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27849 fnaddr = copy_addr_to_reg (fnaddr);
27851 mem = adjust_address (m_tramp, HImode, offset);
27852 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27854 mem = adjust_address (m_tramp, SImode, offset + 2);
27855 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27856 offset += 6;
27858 else
27860 mem = adjust_address (m_tramp, HImode, offset);
27861 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27863 mem = adjust_address (m_tramp, DImode, offset + 2);
27864 emit_move_insn (mem, fnaddr);
27865 offset += 10;
27868 /* Load static chain using movabs to r10. Use the shorter movl
27869 instead of movabs when ptr_mode == SImode. */
27870 if (ptr_mode == SImode)
27872 opcode = 0xba41;
27873 size = 6;
27875 else
27877 opcode = 0xba49;
27878 size = 10;
27881 mem = adjust_address (m_tramp, HImode, offset);
27882 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27884 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27885 emit_move_insn (mem, chain_value);
27886 offset += size;
27888 /* Jump to r11; the last (unused) byte is a nop, only there to
27889 pad the write out to a single 32-bit store. */
27890 mem = adjust_address (m_tramp, SImode, offset);
27891 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27892 offset += 4;
27894 else
27896 rtx disp, chain;
27898 /* Depending on the static chain location, either load a register
27899 with a constant, or push the constant to the stack. All of the
27900 instructions are the same size. */
27901 chain = ix86_static_chain (fndecl, true);
27902 if (REG_P (chain))
27904 switch (REGNO (chain))
27906 case AX_REG:
27907 opcode = 0xb8; break;
27908 case CX_REG:
27909 opcode = 0xb9; break;
27910 default:
27911 gcc_unreachable ();
27914 else
27915 opcode = 0x68;
27917 mem = adjust_address (m_tramp, QImode, offset);
27918 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27920 mem = adjust_address (m_tramp, SImode, offset + 1);
27921 emit_move_insn (mem, chain_value);
27922 offset += 5;
27924 mem = adjust_address (m_tramp, QImode, offset);
27925 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27927 mem = adjust_address (m_tramp, SImode, offset + 1);
27929 /* Compute offset from the end of the jmp to the target function.
27930 In the case in which the trampoline stores the static chain on
27931 the stack, we need to skip the first insn which pushes the
27932 (call-saved) register static chain; this push is 1 byte. */
27933 offset += 5;
27934 disp = expand_binop (SImode, sub_optab, fnaddr,
27935 plus_constant (Pmode, XEXP (m_tramp, 0),
27936 offset - (MEM_P (chain) ? 1 : 0)),
27937 NULL_RTX, 1, OPTAB_DIRECT);
27938 emit_move_insn (mem, disp);
27941 gcc_assert (offset <= TRAMPOLINE_SIZE);
27943 #ifdef HAVE_ENABLE_EXECUTE_STACK
27944 #ifdef CHECK_EXECUTE_STACK_ENABLED
27945 if (CHECK_EXECUTE_STACK_ENABLED)
27946 #endif
27947 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27948 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27949 #endif
27952 /* The following file contains several enumerations and data structures
27953 built from the definitions in i386-builtin-types.def. */
27955 #include "i386-builtin-types.inc"
27957 /* Table for the ix86 builtin non-function types. */
27958 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27960 /* Retrieve an element from the above table, building some of
27961 the types lazily. */
27963 static tree
27964 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27966 unsigned int index;
27967 tree type, itype;
27969 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27971 type = ix86_builtin_type_tab[(int) tcode];
27972 if (type != NULL)
27973 return type;
27975 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27976 if (tcode <= IX86_BT_LAST_VECT)
27978 machine_mode mode;
27980 index = tcode - IX86_BT_LAST_PRIM - 1;
27981 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27982 mode = ix86_builtin_type_vect_mode[index];
27984 type = build_vector_type_for_mode (itype, mode);
27986 else
27988 int quals;
27990 index = tcode - IX86_BT_LAST_VECT - 1;
27991 if (tcode <= IX86_BT_LAST_PTR)
27992 quals = TYPE_UNQUALIFIED;
27993 else
27994 quals = TYPE_QUAL_CONST;
27996 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27997 if (quals != TYPE_UNQUALIFIED)
27998 itype = build_qualified_type (itype, quals);
28000 type = build_pointer_type (itype);
28003 ix86_builtin_type_tab[(int) tcode] = type;
28004 return type;
28007 /* Table for the ix86 builtin function types. */
28008 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
28010 /* Retrieve an element from the above table, building some of
28011 the types lazily. */
28013 static tree
28014 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
28016 tree type;
28018 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
28020 type = ix86_builtin_func_type_tab[(int) tcode];
28021 if (type != NULL)
28022 return type;
28024 if (tcode <= IX86_BT_LAST_FUNC)
28026 unsigned start = ix86_builtin_func_start[(int) tcode];
28027 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
28028 tree rtype, atype, args = void_list_node;
28029 unsigned i;
28031 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
28032 for (i = after - 1; i > start; --i)
28034 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
28035 args = tree_cons (NULL, atype, args);
28038 type = build_function_type (rtype, args);
28040 else
28042 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
28043 enum ix86_builtin_func_type icode;
28045 icode = ix86_builtin_func_alias_base[index];
28046 type = ix86_get_builtin_func_type (icode);
28049 ix86_builtin_func_type_tab[(int) tcode] = type;
28050 return type;
28054 /* Codes for all the SSE/MMX builtins. */
28055 enum ix86_builtins
28057 IX86_BUILTIN_ADDPS,
28058 IX86_BUILTIN_ADDSS,
28059 IX86_BUILTIN_DIVPS,
28060 IX86_BUILTIN_DIVSS,
28061 IX86_BUILTIN_MULPS,
28062 IX86_BUILTIN_MULSS,
28063 IX86_BUILTIN_SUBPS,
28064 IX86_BUILTIN_SUBSS,
28066 IX86_BUILTIN_CMPEQPS,
28067 IX86_BUILTIN_CMPLTPS,
28068 IX86_BUILTIN_CMPLEPS,
28069 IX86_BUILTIN_CMPGTPS,
28070 IX86_BUILTIN_CMPGEPS,
28071 IX86_BUILTIN_CMPNEQPS,
28072 IX86_BUILTIN_CMPNLTPS,
28073 IX86_BUILTIN_CMPNLEPS,
28074 IX86_BUILTIN_CMPNGTPS,
28075 IX86_BUILTIN_CMPNGEPS,
28076 IX86_BUILTIN_CMPORDPS,
28077 IX86_BUILTIN_CMPUNORDPS,
28078 IX86_BUILTIN_CMPEQSS,
28079 IX86_BUILTIN_CMPLTSS,
28080 IX86_BUILTIN_CMPLESS,
28081 IX86_BUILTIN_CMPNEQSS,
28082 IX86_BUILTIN_CMPNLTSS,
28083 IX86_BUILTIN_CMPNLESS,
28084 IX86_BUILTIN_CMPORDSS,
28085 IX86_BUILTIN_CMPUNORDSS,
28087 IX86_BUILTIN_COMIEQSS,
28088 IX86_BUILTIN_COMILTSS,
28089 IX86_BUILTIN_COMILESS,
28090 IX86_BUILTIN_COMIGTSS,
28091 IX86_BUILTIN_COMIGESS,
28092 IX86_BUILTIN_COMINEQSS,
28093 IX86_BUILTIN_UCOMIEQSS,
28094 IX86_BUILTIN_UCOMILTSS,
28095 IX86_BUILTIN_UCOMILESS,
28096 IX86_BUILTIN_UCOMIGTSS,
28097 IX86_BUILTIN_UCOMIGESS,
28098 IX86_BUILTIN_UCOMINEQSS,
28100 IX86_BUILTIN_CVTPI2PS,
28101 IX86_BUILTIN_CVTPS2PI,
28102 IX86_BUILTIN_CVTSI2SS,
28103 IX86_BUILTIN_CVTSI642SS,
28104 IX86_BUILTIN_CVTSS2SI,
28105 IX86_BUILTIN_CVTSS2SI64,
28106 IX86_BUILTIN_CVTTPS2PI,
28107 IX86_BUILTIN_CVTTSS2SI,
28108 IX86_BUILTIN_CVTTSS2SI64,
28110 IX86_BUILTIN_MAXPS,
28111 IX86_BUILTIN_MAXSS,
28112 IX86_BUILTIN_MINPS,
28113 IX86_BUILTIN_MINSS,
28115 IX86_BUILTIN_LOADUPS,
28116 IX86_BUILTIN_STOREUPS,
28117 IX86_BUILTIN_MOVSS,
28119 IX86_BUILTIN_MOVHLPS,
28120 IX86_BUILTIN_MOVLHPS,
28121 IX86_BUILTIN_LOADHPS,
28122 IX86_BUILTIN_LOADLPS,
28123 IX86_BUILTIN_STOREHPS,
28124 IX86_BUILTIN_STORELPS,
28126 IX86_BUILTIN_MASKMOVQ,
28127 IX86_BUILTIN_MOVMSKPS,
28128 IX86_BUILTIN_PMOVMSKB,
28130 IX86_BUILTIN_MOVNTPS,
28131 IX86_BUILTIN_MOVNTQ,
28133 IX86_BUILTIN_LOADDQU,
28134 IX86_BUILTIN_STOREDQU,
28136 IX86_BUILTIN_PACKSSWB,
28137 IX86_BUILTIN_PACKSSDW,
28138 IX86_BUILTIN_PACKUSWB,
28140 IX86_BUILTIN_PADDB,
28141 IX86_BUILTIN_PADDW,
28142 IX86_BUILTIN_PADDD,
28143 IX86_BUILTIN_PADDQ,
28144 IX86_BUILTIN_PADDSB,
28145 IX86_BUILTIN_PADDSW,
28146 IX86_BUILTIN_PADDUSB,
28147 IX86_BUILTIN_PADDUSW,
28148 IX86_BUILTIN_PSUBB,
28149 IX86_BUILTIN_PSUBW,
28150 IX86_BUILTIN_PSUBD,
28151 IX86_BUILTIN_PSUBQ,
28152 IX86_BUILTIN_PSUBSB,
28153 IX86_BUILTIN_PSUBSW,
28154 IX86_BUILTIN_PSUBUSB,
28155 IX86_BUILTIN_PSUBUSW,
28157 IX86_BUILTIN_PAND,
28158 IX86_BUILTIN_PANDN,
28159 IX86_BUILTIN_POR,
28160 IX86_BUILTIN_PXOR,
28162 IX86_BUILTIN_PAVGB,
28163 IX86_BUILTIN_PAVGW,
28165 IX86_BUILTIN_PCMPEQB,
28166 IX86_BUILTIN_PCMPEQW,
28167 IX86_BUILTIN_PCMPEQD,
28168 IX86_BUILTIN_PCMPGTB,
28169 IX86_BUILTIN_PCMPGTW,
28170 IX86_BUILTIN_PCMPGTD,
28172 IX86_BUILTIN_PMADDWD,
28174 IX86_BUILTIN_PMAXSW,
28175 IX86_BUILTIN_PMAXUB,
28176 IX86_BUILTIN_PMINSW,
28177 IX86_BUILTIN_PMINUB,
28179 IX86_BUILTIN_PMULHUW,
28180 IX86_BUILTIN_PMULHW,
28181 IX86_BUILTIN_PMULLW,
28183 IX86_BUILTIN_PSADBW,
28184 IX86_BUILTIN_PSHUFW,
28186 IX86_BUILTIN_PSLLW,
28187 IX86_BUILTIN_PSLLD,
28188 IX86_BUILTIN_PSLLQ,
28189 IX86_BUILTIN_PSRAW,
28190 IX86_BUILTIN_PSRAD,
28191 IX86_BUILTIN_PSRLW,
28192 IX86_BUILTIN_PSRLD,
28193 IX86_BUILTIN_PSRLQ,
28194 IX86_BUILTIN_PSLLWI,
28195 IX86_BUILTIN_PSLLDI,
28196 IX86_BUILTIN_PSLLQI,
28197 IX86_BUILTIN_PSRAWI,
28198 IX86_BUILTIN_PSRADI,
28199 IX86_BUILTIN_PSRLWI,
28200 IX86_BUILTIN_PSRLDI,
28201 IX86_BUILTIN_PSRLQI,
28203 IX86_BUILTIN_PUNPCKHBW,
28204 IX86_BUILTIN_PUNPCKHWD,
28205 IX86_BUILTIN_PUNPCKHDQ,
28206 IX86_BUILTIN_PUNPCKLBW,
28207 IX86_BUILTIN_PUNPCKLWD,
28208 IX86_BUILTIN_PUNPCKLDQ,
28210 IX86_BUILTIN_SHUFPS,
28212 IX86_BUILTIN_RCPPS,
28213 IX86_BUILTIN_RCPSS,
28214 IX86_BUILTIN_RSQRTPS,
28215 IX86_BUILTIN_RSQRTPS_NR,
28216 IX86_BUILTIN_RSQRTSS,
28217 IX86_BUILTIN_RSQRTF,
28218 IX86_BUILTIN_SQRTPS,
28219 IX86_BUILTIN_SQRTPS_NR,
28220 IX86_BUILTIN_SQRTSS,
28222 IX86_BUILTIN_UNPCKHPS,
28223 IX86_BUILTIN_UNPCKLPS,
28225 IX86_BUILTIN_ANDPS,
28226 IX86_BUILTIN_ANDNPS,
28227 IX86_BUILTIN_ORPS,
28228 IX86_BUILTIN_XORPS,
28230 IX86_BUILTIN_EMMS,
28231 IX86_BUILTIN_LDMXCSR,
28232 IX86_BUILTIN_STMXCSR,
28233 IX86_BUILTIN_SFENCE,
28235 IX86_BUILTIN_FXSAVE,
28236 IX86_BUILTIN_FXRSTOR,
28237 IX86_BUILTIN_FXSAVE64,
28238 IX86_BUILTIN_FXRSTOR64,
28240 IX86_BUILTIN_XSAVE,
28241 IX86_BUILTIN_XRSTOR,
28242 IX86_BUILTIN_XSAVE64,
28243 IX86_BUILTIN_XRSTOR64,
28245 IX86_BUILTIN_XSAVEOPT,
28246 IX86_BUILTIN_XSAVEOPT64,
28248 IX86_BUILTIN_XSAVEC,
28249 IX86_BUILTIN_XSAVEC64,
28251 IX86_BUILTIN_XSAVES,
28252 IX86_BUILTIN_XRSTORS,
28253 IX86_BUILTIN_XSAVES64,
28254 IX86_BUILTIN_XRSTORS64,
28256 /* 3DNow! Original */
28257 IX86_BUILTIN_FEMMS,
28258 IX86_BUILTIN_PAVGUSB,
28259 IX86_BUILTIN_PF2ID,
28260 IX86_BUILTIN_PFACC,
28261 IX86_BUILTIN_PFADD,
28262 IX86_BUILTIN_PFCMPEQ,
28263 IX86_BUILTIN_PFCMPGE,
28264 IX86_BUILTIN_PFCMPGT,
28265 IX86_BUILTIN_PFMAX,
28266 IX86_BUILTIN_PFMIN,
28267 IX86_BUILTIN_PFMUL,
28268 IX86_BUILTIN_PFRCP,
28269 IX86_BUILTIN_PFRCPIT1,
28270 IX86_BUILTIN_PFRCPIT2,
28271 IX86_BUILTIN_PFRSQIT1,
28272 IX86_BUILTIN_PFRSQRT,
28273 IX86_BUILTIN_PFSUB,
28274 IX86_BUILTIN_PFSUBR,
28275 IX86_BUILTIN_PI2FD,
28276 IX86_BUILTIN_PMULHRW,
28278 /* 3DNow! Athlon Extensions */
28279 IX86_BUILTIN_PF2IW,
28280 IX86_BUILTIN_PFNACC,
28281 IX86_BUILTIN_PFPNACC,
28282 IX86_BUILTIN_PI2FW,
28283 IX86_BUILTIN_PSWAPDSI,
28284 IX86_BUILTIN_PSWAPDSF,
28286 /* SSE2 */
28287 IX86_BUILTIN_ADDPD,
28288 IX86_BUILTIN_ADDSD,
28289 IX86_BUILTIN_DIVPD,
28290 IX86_BUILTIN_DIVSD,
28291 IX86_BUILTIN_MULPD,
28292 IX86_BUILTIN_MULSD,
28293 IX86_BUILTIN_SUBPD,
28294 IX86_BUILTIN_SUBSD,
28296 IX86_BUILTIN_CMPEQPD,
28297 IX86_BUILTIN_CMPLTPD,
28298 IX86_BUILTIN_CMPLEPD,
28299 IX86_BUILTIN_CMPGTPD,
28300 IX86_BUILTIN_CMPGEPD,
28301 IX86_BUILTIN_CMPNEQPD,
28302 IX86_BUILTIN_CMPNLTPD,
28303 IX86_BUILTIN_CMPNLEPD,
28304 IX86_BUILTIN_CMPNGTPD,
28305 IX86_BUILTIN_CMPNGEPD,
28306 IX86_BUILTIN_CMPORDPD,
28307 IX86_BUILTIN_CMPUNORDPD,
28308 IX86_BUILTIN_CMPEQSD,
28309 IX86_BUILTIN_CMPLTSD,
28310 IX86_BUILTIN_CMPLESD,
28311 IX86_BUILTIN_CMPNEQSD,
28312 IX86_BUILTIN_CMPNLTSD,
28313 IX86_BUILTIN_CMPNLESD,
28314 IX86_BUILTIN_CMPORDSD,
28315 IX86_BUILTIN_CMPUNORDSD,
28317 IX86_BUILTIN_COMIEQSD,
28318 IX86_BUILTIN_COMILTSD,
28319 IX86_BUILTIN_COMILESD,
28320 IX86_BUILTIN_COMIGTSD,
28321 IX86_BUILTIN_COMIGESD,
28322 IX86_BUILTIN_COMINEQSD,
28323 IX86_BUILTIN_UCOMIEQSD,
28324 IX86_BUILTIN_UCOMILTSD,
28325 IX86_BUILTIN_UCOMILESD,
28326 IX86_BUILTIN_UCOMIGTSD,
28327 IX86_BUILTIN_UCOMIGESD,
28328 IX86_BUILTIN_UCOMINEQSD,
28330 IX86_BUILTIN_MAXPD,
28331 IX86_BUILTIN_MAXSD,
28332 IX86_BUILTIN_MINPD,
28333 IX86_BUILTIN_MINSD,
28335 IX86_BUILTIN_ANDPD,
28336 IX86_BUILTIN_ANDNPD,
28337 IX86_BUILTIN_ORPD,
28338 IX86_BUILTIN_XORPD,
28340 IX86_BUILTIN_SQRTPD,
28341 IX86_BUILTIN_SQRTSD,
28343 IX86_BUILTIN_UNPCKHPD,
28344 IX86_BUILTIN_UNPCKLPD,
28346 IX86_BUILTIN_SHUFPD,
28348 IX86_BUILTIN_LOADUPD,
28349 IX86_BUILTIN_STOREUPD,
28350 IX86_BUILTIN_MOVSD,
28352 IX86_BUILTIN_LOADHPD,
28353 IX86_BUILTIN_LOADLPD,
28355 IX86_BUILTIN_CVTDQ2PD,
28356 IX86_BUILTIN_CVTDQ2PS,
28358 IX86_BUILTIN_CVTPD2DQ,
28359 IX86_BUILTIN_CVTPD2PI,
28360 IX86_BUILTIN_CVTPD2PS,
28361 IX86_BUILTIN_CVTTPD2DQ,
28362 IX86_BUILTIN_CVTTPD2PI,
28364 IX86_BUILTIN_CVTPI2PD,
28365 IX86_BUILTIN_CVTSI2SD,
28366 IX86_BUILTIN_CVTSI642SD,
28368 IX86_BUILTIN_CVTSD2SI,
28369 IX86_BUILTIN_CVTSD2SI64,
28370 IX86_BUILTIN_CVTSD2SS,
28371 IX86_BUILTIN_CVTSS2SD,
28372 IX86_BUILTIN_CVTTSD2SI,
28373 IX86_BUILTIN_CVTTSD2SI64,
28375 IX86_BUILTIN_CVTPS2DQ,
28376 IX86_BUILTIN_CVTPS2PD,
28377 IX86_BUILTIN_CVTTPS2DQ,
28379 IX86_BUILTIN_MOVNTI,
28380 IX86_BUILTIN_MOVNTI64,
28381 IX86_BUILTIN_MOVNTPD,
28382 IX86_BUILTIN_MOVNTDQ,
28384 IX86_BUILTIN_MOVQ128,
28386 /* SSE2 MMX */
28387 IX86_BUILTIN_MASKMOVDQU,
28388 IX86_BUILTIN_MOVMSKPD,
28389 IX86_BUILTIN_PMOVMSKB128,
28391 IX86_BUILTIN_PACKSSWB128,
28392 IX86_BUILTIN_PACKSSDW128,
28393 IX86_BUILTIN_PACKUSWB128,
28395 IX86_BUILTIN_PADDB128,
28396 IX86_BUILTIN_PADDW128,
28397 IX86_BUILTIN_PADDD128,
28398 IX86_BUILTIN_PADDQ128,
28399 IX86_BUILTIN_PADDSB128,
28400 IX86_BUILTIN_PADDSW128,
28401 IX86_BUILTIN_PADDUSB128,
28402 IX86_BUILTIN_PADDUSW128,
28403 IX86_BUILTIN_PSUBB128,
28404 IX86_BUILTIN_PSUBW128,
28405 IX86_BUILTIN_PSUBD128,
28406 IX86_BUILTIN_PSUBQ128,
28407 IX86_BUILTIN_PSUBSB128,
28408 IX86_BUILTIN_PSUBSW128,
28409 IX86_BUILTIN_PSUBUSB128,
28410 IX86_BUILTIN_PSUBUSW128,
28412 IX86_BUILTIN_PAND128,
28413 IX86_BUILTIN_PANDN128,
28414 IX86_BUILTIN_POR128,
28415 IX86_BUILTIN_PXOR128,
28417 IX86_BUILTIN_PAVGB128,
28418 IX86_BUILTIN_PAVGW128,
28420 IX86_BUILTIN_PCMPEQB128,
28421 IX86_BUILTIN_PCMPEQW128,
28422 IX86_BUILTIN_PCMPEQD128,
28423 IX86_BUILTIN_PCMPGTB128,
28424 IX86_BUILTIN_PCMPGTW128,
28425 IX86_BUILTIN_PCMPGTD128,
28427 IX86_BUILTIN_PMADDWD128,
28429 IX86_BUILTIN_PMAXSW128,
28430 IX86_BUILTIN_PMAXUB128,
28431 IX86_BUILTIN_PMINSW128,
28432 IX86_BUILTIN_PMINUB128,
28434 IX86_BUILTIN_PMULUDQ,
28435 IX86_BUILTIN_PMULUDQ128,
28436 IX86_BUILTIN_PMULHUW128,
28437 IX86_BUILTIN_PMULHW128,
28438 IX86_BUILTIN_PMULLW128,
28440 IX86_BUILTIN_PSADBW128,
28441 IX86_BUILTIN_PSHUFHW,
28442 IX86_BUILTIN_PSHUFLW,
28443 IX86_BUILTIN_PSHUFD,
28445 IX86_BUILTIN_PSLLDQI128,
28446 IX86_BUILTIN_PSLLWI128,
28447 IX86_BUILTIN_PSLLDI128,
28448 IX86_BUILTIN_PSLLQI128,
28449 IX86_BUILTIN_PSRAWI128,
28450 IX86_BUILTIN_PSRADI128,
28451 IX86_BUILTIN_PSRLDQI128,
28452 IX86_BUILTIN_PSRLWI128,
28453 IX86_BUILTIN_PSRLDI128,
28454 IX86_BUILTIN_PSRLQI128,
28456 IX86_BUILTIN_PSLLDQ128,
28457 IX86_BUILTIN_PSLLW128,
28458 IX86_BUILTIN_PSLLD128,
28459 IX86_BUILTIN_PSLLQ128,
28460 IX86_BUILTIN_PSRAW128,
28461 IX86_BUILTIN_PSRAD128,
28462 IX86_BUILTIN_PSRLW128,
28463 IX86_BUILTIN_PSRLD128,
28464 IX86_BUILTIN_PSRLQ128,
28466 IX86_BUILTIN_PUNPCKHBW128,
28467 IX86_BUILTIN_PUNPCKHWD128,
28468 IX86_BUILTIN_PUNPCKHDQ128,
28469 IX86_BUILTIN_PUNPCKHQDQ128,
28470 IX86_BUILTIN_PUNPCKLBW128,
28471 IX86_BUILTIN_PUNPCKLWD128,
28472 IX86_BUILTIN_PUNPCKLDQ128,
28473 IX86_BUILTIN_PUNPCKLQDQ128,
28475 IX86_BUILTIN_CLFLUSH,
28476 IX86_BUILTIN_MFENCE,
28477 IX86_BUILTIN_LFENCE,
28478 IX86_BUILTIN_PAUSE,
28480 IX86_BUILTIN_FNSTENV,
28481 IX86_BUILTIN_FLDENV,
28482 IX86_BUILTIN_FNSTSW,
28483 IX86_BUILTIN_FNCLEX,
28485 IX86_BUILTIN_BSRSI,
28486 IX86_BUILTIN_BSRDI,
28487 IX86_BUILTIN_RDPMC,
28488 IX86_BUILTIN_RDTSC,
28489 IX86_BUILTIN_RDTSCP,
28490 IX86_BUILTIN_ROLQI,
28491 IX86_BUILTIN_ROLHI,
28492 IX86_BUILTIN_RORQI,
28493 IX86_BUILTIN_RORHI,
28495 /* SSE3. */
28496 IX86_BUILTIN_ADDSUBPS,
28497 IX86_BUILTIN_HADDPS,
28498 IX86_BUILTIN_HSUBPS,
28499 IX86_BUILTIN_MOVSHDUP,
28500 IX86_BUILTIN_MOVSLDUP,
28501 IX86_BUILTIN_ADDSUBPD,
28502 IX86_BUILTIN_HADDPD,
28503 IX86_BUILTIN_HSUBPD,
28504 IX86_BUILTIN_LDDQU,
28506 IX86_BUILTIN_MONITOR,
28507 IX86_BUILTIN_MWAIT,
28509 /* SSSE3. */
28510 IX86_BUILTIN_PHADDW,
28511 IX86_BUILTIN_PHADDD,
28512 IX86_BUILTIN_PHADDSW,
28513 IX86_BUILTIN_PHSUBW,
28514 IX86_BUILTIN_PHSUBD,
28515 IX86_BUILTIN_PHSUBSW,
28516 IX86_BUILTIN_PMADDUBSW,
28517 IX86_BUILTIN_PMULHRSW,
28518 IX86_BUILTIN_PSHUFB,
28519 IX86_BUILTIN_PSIGNB,
28520 IX86_BUILTIN_PSIGNW,
28521 IX86_BUILTIN_PSIGND,
28522 IX86_BUILTIN_PALIGNR,
28523 IX86_BUILTIN_PABSB,
28524 IX86_BUILTIN_PABSW,
28525 IX86_BUILTIN_PABSD,
28527 IX86_BUILTIN_PHADDW128,
28528 IX86_BUILTIN_PHADDD128,
28529 IX86_BUILTIN_PHADDSW128,
28530 IX86_BUILTIN_PHSUBW128,
28531 IX86_BUILTIN_PHSUBD128,
28532 IX86_BUILTIN_PHSUBSW128,
28533 IX86_BUILTIN_PMADDUBSW128,
28534 IX86_BUILTIN_PMULHRSW128,
28535 IX86_BUILTIN_PSHUFB128,
28536 IX86_BUILTIN_PSIGNB128,
28537 IX86_BUILTIN_PSIGNW128,
28538 IX86_BUILTIN_PSIGND128,
28539 IX86_BUILTIN_PALIGNR128,
28540 IX86_BUILTIN_PABSB128,
28541 IX86_BUILTIN_PABSW128,
28542 IX86_BUILTIN_PABSD128,
28544 /* AMDFAM10 - SSE4A New Instructions. */
28545 IX86_BUILTIN_MOVNTSD,
28546 IX86_BUILTIN_MOVNTSS,
28547 IX86_BUILTIN_EXTRQI,
28548 IX86_BUILTIN_EXTRQ,
28549 IX86_BUILTIN_INSERTQI,
28550 IX86_BUILTIN_INSERTQ,
28552 /* SSE4.1. */
28553 IX86_BUILTIN_BLENDPD,
28554 IX86_BUILTIN_BLENDPS,
28555 IX86_BUILTIN_BLENDVPD,
28556 IX86_BUILTIN_BLENDVPS,
28557 IX86_BUILTIN_PBLENDVB128,
28558 IX86_BUILTIN_PBLENDW128,
28560 IX86_BUILTIN_DPPD,
28561 IX86_BUILTIN_DPPS,
28563 IX86_BUILTIN_INSERTPS128,
28565 IX86_BUILTIN_MOVNTDQA,
28566 IX86_BUILTIN_MPSADBW128,
28567 IX86_BUILTIN_PACKUSDW128,
28568 IX86_BUILTIN_PCMPEQQ,
28569 IX86_BUILTIN_PHMINPOSUW128,
28571 IX86_BUILTIN_PMAXSB128,
28572 IX86_BUILTIN_PMAXSD128,
28573 IX86_BUILTIN_PMAXUD128,
28574 IX86_BUILTIN_PMAXUW128,
28576 IX86_BUILTIN_PMINSB128,
28577 IX86_BUILTIN_PMINSD128,
28578 IX86_BUILTIN_PMINUD128,
28579 IX86_BUILTIN_PMINUW128,
28581 IX86_BUILTIN_PMOVSXBW128,
28582 IX86_BUILTIN_PMOVSXBD128,
28583 IX86_BUILTIN_PMOVSXBQ128,
28584 IX86_BUILTIN_PMOVSXWD128,
28585 IX86_BUILTIN_PMOVSXWQ128,
28586 IX86_BUILTIN_PMOVSXDQ128,
28588 IX86_BUILTIN_PMOVZXBW128,
28589 IX86_BUILTIN_PMOVZXBD128,
28590 IX86_BUILTIN_PMOVZXBQ128,
28591 IX86_BUILTIN_PMOVZXWD128,
28592 IX86_BUILTIN_PMOVZXWQ128,
28593 IX86_BUILTIN_PMOVZXDQ128,
28595 IX86_BUILTIN_PMULDQ128,
28596 IX86_BUILTIN_PMULLD128,
28598 IX86_BUILTIN_ROUNDSD,
28599 IX86_BUILTIN_ROUNDSS,
28601 IX86_BUILTIN_ROUNDPD,
28602 IX86_BUILTIN_ROUNDPS,
28604 IX86_BUILTIN_FLOORPD,
28605 IX86_BUILTIN_CEILPD,
28606 IX86_BUILTIN_TRUNCPD,
28607 IX86_BUILTIN_RINTPD,
28608 IX86_BUILTIN_ROUNDPD_AZ,
28610 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28611 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28612 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28614 IX86_BUILTIN_FLOORPS,
28615 IX86_BUILTIN_CEILPS,
28616 IX86_BUILTIN_TRUNCPS,
28617 IX86_BUILTIN_RINTPS,
28618 IX86_BUILTIN_ROUNDPS_AZ,
28620 IX86_BUILTIN_FLOORPS_SFIX,
28621 IX86_BUILTIN_CEILPS_SFIX,
28622 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28624 IX86_BUILTIN_PTESTZ,
28625 IX86_BUILTIN_PTESTC,
28626 IX86_BUILTIN_PTESTNZC,
28628 IX86_BUILTIN_VEC_INIT_V2SI,
28629 IX86_BUILTIN_VEC_INIT_V4HI,
28630 IX86_BUILTIN_VEC_INIT_V8QI,
28631 IX86_BUILTIN_VEC_EXT_V2DF,
28632 IX86_BUILTIN_VEC_EXT_V2DI,
28633 IX86_BUILTIN_VEC_EXT_V4SF,
28634 IX86_BUILTIN_VEC_EXT_V4SI,
28635 IX86_BUILTIN_VEC_EXT_V8HI,
28636 IX86_BUILTIN_VEC_EXT_V2SI,
28637 IX86_BUILTIN_VEC_EXT_V4HI,
28638 IX86_BUILTIN_VEC_EXT_V16QI,
28639 IX86_BUILTIN_VEC_SET_V2DI,
28640 IX86_BUILTIN_VEC_SET_V4SF,
28641 IX86_BUILTIN_VEC_SET_V4SI,
28642 IX86_BUILTIN_VEC_SET_V8HI,
28643 IX86_BUILTIN_VEC_SET_V4HI,
28644 IX86_BUILTIN_VEC_SET_V16QI,
28646 IX86_BUILTIN_VEC_PACK_SFIX,
28647 IX86_BUILTIN_VEC_PACK_SFIX256,
28649 /* SSE4.2. */
28650 IX86_BUILTIN_CRC32QI,
28651 IX86_BUILTIN_CRC32HI,
28652 IX86_BUILTIN_CRC32SI,
28653 IX86_BUILTIN_CRC32DI,
28655 IX86_BUILTIN_PCMPESTRI128,
28656 IX86_BUILTIN_PCMPESTRM128,
28657 IX86_BUILTIN_PCMPESTRA128,
28658 IX86_BUILTIN_PCMPESTRC128,
28659 IX86_BUILTIN_PCMPESTRO128,
28660 IX86_BUILTIN_PCMPESTRS128,
28661 IX86_BUILTIN_PCMPESTRZ128,
28662 IX86_BUILTIN_PCMPISTRI128,
28663 IX86_BUILTIN_PCMPISTRM128,
28664 IX86_BUILTIN_PCMPISTRA128,
28665 IX86_BUILTIN_PCMPISTRC128,
28666 IX86_BUILTIN_PCMPISTRO128,
28667 IX86_BUILTIN_PCMPISTRS128,
28668 IX86_BUILTIN_PCMPISTRZ128,
28670 IX86_BUILTIN_PCMPGTQ,
28672 /* AES instructions */
28673 IX86_BUILTIN_AESENC128,
28674 IX86_BUILTIN_AESENCLAST128,
28675 IX86_BUILTIN_AESDEC128,
28676 IX86_BUILTIN_AESDECLAST128,
28677 IX86_BUILTIN_AESIMC128,
28678 IX86_BUILTIN_AESKEYGENASSIST128,
28680 /* PCLMUL instruction */
28681 IX86_BUILTIN_PCLMULQDQ128,
28683 /* AVX */
28684 IX86_BUILTIN_ADDPD256,
28685 IX86_BUILTIN_ADDPS256,
28686 IX86_BUILTIN_ADDSUBPD256,
28687 IX86_BUILTIN_ADDSUBPS256,
28688 IX86_BUILTIN_ANDPD256,
28689 IX86_BUILTIN_ANDPS256,
28690 IX86_BUILTIN_ANDNPD256,
28691 IX86_BUILTIN_ANDNPS256,
28692 IX86_BUILTIN_BLENDPD256,
28693 IX86_BUILTIN_BLENDPS256,
28694 IX86_BUILTIN_BLENDVPD256,
28695 IX86_BUILTIN_BLENDVPS256,
28696 IX86_BUILTIN_DIVPD256,
28697 IX86_BUILTIN_DIVPS256,
28698 IX86_BUILTIN_DPPS256,
28699 IX86_BUILTIN_HADDPD256,
28700 IX86_BUILTIN_HADDPS256,
28701 IX86_BUILTIN_HSUBPD256,
28702 IX86_BUILTIN_HSUBPS256,
28703 IX86_BUILTIN_MAXPD256,
28704 IX86_BUILTIN_MAXPS256,
28705 IX86_BUILTIN_MINPD256,
28706 IX86_BUILTIN_MINPS256,
28707 IX86_BUILTIN_MULPD256,
28708 IX86_BUILTIN_MULPS256,
28709 IX86_BUILTIN_ORPD256,
28710 IX86_BUILTIN_ORPS256,
28711 IX86_BUILTIN_SHUFPD256,
28712 IX86_BUILTIN_SHUFPS256,
28713 IX86_BUILTIN_SUBPD256,
28714 IX86_BUILTIN_SUBPS256,
28715 IX86_BUILTIN_XORPD256,
28716 IX86_BUILTIN_XORPS256,
28717 IX86_BUILTIN_CMPSD,
28718 IX86_BUILTIN_CMPSS,
28719 IX86_BUILTIN_CMPPD,
28720 IX86_BUILTIN_CMPPS,
28721 IX86_BUILTIN_CMPPD256,
28722 IX86_BUILTIN_CMPPS256,
28723 IX86_BUILTIN_CVTDQ2PD256,
28724 IX86_BUILTIN_CVTDQ2PS256,
28725 IX86_BUILTIN_CVTPD2PS256,
28726 IX86_BUILTIN_CVTPS2DQ256,
28727 IX86_BUILTIN_CVTPS2PD256,
28728 IX86_BUILTIN_CVTTPD2DQ256,
28729 IX86_BUILTIN_CVTPD2DQ256,
28730 IX86_BUILTIN_CVTTPS2DQ256,
28731 IX86_BUILTIN_EXTRACTF128PD256,
28732 IX86_BUILTIN_EXTRACTF128PS256,
28733 IX86_BUILTIN_EXTRACTF128SI256,
28734 IX86_BUILTIN_VZEROALL,
28735 IX86_BUILTIN_VZEROUPPER,
28736 IX86_BUILTIN_VPERMILVARPD,
28737 IX86_BUILTIN_VPERMILVARPS,
28738 IX86_BUILTIN_VPERMILVARPD256,
28739 IX86_BUILTIN_VPERMILVARPS256,
28740 IX86_BUILTIN_VPERMILPD,
28741 IX86_BUILTIN_VPERMILPS,
28742 IX86_BUILTIN_VPERMILPD256,
28743 IX86_BUILTIN_VPERMILPS256,
28744 IX86_BUILTIN_VPERMIL2PD,
28745 IX86_BUILTIN_VPERMIL2PS,
28746 IX86_BUILTIN_VPERMIL2PD256,
28747 IX86_BUILTIN_VPERMIL2PS256,
28748 IX86_BUILTIN_VPERM2F128PD256,
28749 IX86_BUILTIN_VPERM2F128PS256,
28750 IX86_BUILTIN_VPERM2F128SI256,
28751 IX86_BUILTIN_VBROADCASTSS,
28752 IX86_BUILTIN_VBROADCASTSD256,
28753 IX86_BUILTIN_VBROADCASTSS256,
28754 IX86_BUILTIN_VBROADCASTPD256,
28755 IX86_BUILTIN_VBROADCASTPS256,
28756 IX86_BUILTIN_VINSERTF128PD256,
28757 IX86_BUILTIN_VINSERTF128PS256,
28758 IX86_BUILTIN_VINSERTF128SI256,
28759 IX86_BUILTIN_LOADUPD256,
28760 IX86_BUILTIN_LOADUPS256,
28761 IX86_BUILTIN_STOREUPD256,
28762 IX86_BUILTIN_STOREUPS256,
28763 IX86_BUILTIN_LDDQU256,
28764 IX86_BUILTIN_MOVNTDQ256,
28765 IX86_BUILTIN_MOVNTPD256,
28766 IX86_BUILTIN_MOVNTPS256,
28767 IX86_BUILTIN_LOADDQU256,
28768 IX86_BUILTIN_STOREDQU256,
28769 IX86_BUILTIN_MASKLOADPD,
28770 IX86_BUILTIN_MASKLOADPS,
28771 IX86_BUILTIN_MASKSTOREPD,
28772 IX86_BUILTIN_MASKSTOREPS,
28773 IX86_BUILTIN_MASKLOADPD256,
28774 IX86_BUILTIN_MASKLOADPS256,
28775 IX86_BUILTIN_MASKSTOREPD256,
28776 IX86_BUILTIN_MASKSTOREPS256,
28777 IX86_BUILTIN_MOVSHDUP256,
28778 IX86_BUILTIN_MOVSLDUP256,
28779 IX86_BUILTIN_MOVDDUP256,
28781 IX86_BUILTIN_SQRTPD256,
28782 IX86_BUILTIN_SQRTPS256,
28783 IX86_BUILTIN_SQRTPS_NR256,
28784 IX86_BUILTIN_RSQRTPS256,
28785 IX86_BUILTIN_RSQRTPS_NR256,
28787 IX86_BUILTIN_RCPPS256,
28789 IX86_BUILTIN_ROUNDPD256,
28790 IX86_BUILTIN_ROUNDPS256,
28792 IX86_BUILTIN_FLOORPD256,
28793 IX86_BUILTIN_CEILPD256,
28794 IX86_BUILTIN_TRUNCPD256,
28795 IX86_BUILTIN_RINTPD256,
28796 IX86_BUILTIN_ROUNDPD_AZ256,
28798 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28799 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28800 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28802 IX86_BUILTIN_FLOORPS256,
28803 IX86_BUILTIN_CEILPS256,
28804 IX86_BUILTIN_TRUNCPS256,
28805 IX86_BUILTIN_RINTPS256,
28806 IX86_BUILTIN_ROUNDPS_AZ256,
28808 IX86_BUILTIN_FLOORPS_SFIX256,
28809 IX86_BUILTIN_CEILPS_SFIX256,
28810 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28812 IX86_BUILTIN_UNPCKHPD256,
28813 IX86_BUILTIN_UNPCKLPD256,
28814 IX86_BUILTIN_UNPCKHPS256,
28815 IX86_BUILTIN_UNPCKLPS256,
28817 IX86_BUILTIN_SI256_SI,
28818 IX86_BUILTIN_PS256_PS,
28819 IX86_BUILTIN_PD256_PD,
28820 IX86_BUILTIN_SI_SI256,
28821 IX86_BUILTIN_PS_PS256,
28822 IX86_BUILTIN_PD_PD256,
28824 IX86_BUILTIN_VTESTZPD,
28825 IX86_BUILTIN_VTESTCPD,
28826 IX86_BUILTIN_VTESTNZCPD,
28827 IX86_BUILTIN_VTESTZPS,
28828 IX86_BUILTIN_VTESTCPS,
28829 IX86_BUILTIN_VTESTNZCPS,
28830 IX86_BUILTIN_VTESTZPD256,
28831 IX86_BUILTIN_VTESTCPD256,
28832 IX86_BUILTIN_VTESTNZCPD256,
28833 IX86_BUILTIN_VTESTZPS256,
28834 IX86_BUILTIN_VTESTCPS256,
28835 IX86_BUILTIN_VTESTNZCPS256,
28836 IX86_BUILTIN_PTESTZ256,
28837 IX86_BUILTIN_PTESTC256,
28838 IX86_BUILTIN_PTESTNZC256,
28840 IX86_BUILTIN_MOVMSKPD256,
28841 IX86_BUILTIN_MOVMSKPS256,
28843 /* AVX2 */
28844 IX86_BUILTIN_MPSADBW256,
28845 IX86_BUILTIN_PABSB256,
28846 IX86_BUILTIN_PABSW256,
28847 IX86_BUILTIN_PABSD256,
28848 IX86_BUILTIN_PACKSSDW256,
28849 IX86_BUILTIN_PACKSSWB256,
28850 IX86_BUILTIN_PACKUSDW256,
28851 IX86_BUILTIN_PACKUSWB256,
28852 IX86_BUILTIN_PADDB256,
28853 IX86_BUILTIN_PADDW256,
28854 IX86_BUILTIN_PADDD256,
28855 IX86_BUILTIN_PADDQ256,
28856 IX86_BUILTIN_PADDSB256,
28857 IX86_BUILTIN_PADDSW256,
28858 IX86_BUILTIN_PADDUSB256,
28859 IX86_BUILTIN_PADDUSW256,
28860 IX86_BUILTIN_PALIGNR256,
28861 IX86_BUILTIN_AND256I,
28862 IX86_BUILTIN_ANDNOT256I,
28863 IX86_BUILTIN_PAVGB256,
28864 IX86_BUILTIN_PAVGW256,
28865 IX86_BUILTIN_PBLENDVB256,
28866 IX86_BUILTIN_PBLENDVW256,
28867 IX86_BUILTIN_PCMPEQB256,
28868 IX86_BUILTIN_PCMPEQW256,
28869 IX86_BUILTIN_PCMPEQD256,
28870 IX86_BUILTIN_PCMPEQQ256,
28871 IX86_BUILTIN_PCMPGTB256,
28872 IX86_BUILTIN_PCMPGTW256,
28873 IX86_BUILTIN_PCMPGTD256,
28874 IX86_BUILTIN_PCMPGTQ256,
28875 IX86_BUILTIN_PHADDW256,
28876 IX86_BUILTIN_PHADDD256,
28877 IX86_BUILTIN_PHADDSW256,
28878 IX86_BUILTIN_PHSUBW256,
28879 IX86_BUILTIN_PHSUBD256,
28880 IX86_BUILTIN_PHSUBSW256,
28881 IX86_BUILTIN_PMADDUBSW256,
28882 IX86_BUILTIN_PMADDWD256,
28883 IX86_BUILTIN_PMAXSB256,
28884 IX86_BUILTIN_PMAXSW256,
28885 IX86_BUILTIN_PMAXSD256,
28886 IX86_BUILTIN_PMAXUB256,
28887 IX86_BUILTIN_PMAXUW256,
28888 IX86_BUILTIN_PMAXUD256,
28889 IX86_BUILTIN_PMINSB256,
28890 IX86_BUILTIN_PMINSW256,
28891 IX86_BUILTIN_PMINSD256,
28892 IX86_BUILTIN_PMINUB256,
28893 IX86_BUILTIN_PMINUW256,
28894 IX86_BUILTIN_PMINUD256,
28895 IX86_BUILTIN_PMOVMSKB256,
28896 IX86_BUILTIN_PMOVSXBW256,
28897 IX86_BUILTIN_PMOVSXBD256,
28898 IX86_BUILTIN_PMOVSXBQ256,
28899 IX86_BUILTIN_PMOVSXWD256,
28900 IX86_BUILTIN_PMOVSXWQ256,
28901 IX86_BUILTIN_PMOVSXDQ256,
28902 IX86_BUILTIN_PMOVZXBW256,
28903 IX86_BUILTIN_PMOVZXBD256,
28904 IX86_BUILTIN_PMOVZXBQ256,
28905 IX86_BUILTIN_PMOVZXWD256,
28906 IX86_BUILTIN_PMOVZXWQ256,
28907 IX86_BUILTIN_PMOVZXDQ256,
28908 IX86_BUILTIN_PMULDQ256,
28909 IX86_BUILTIN_PMULHRSW256,
28910 IX86_BUILTIN_PMULHUW256,
28911 IX86_BUILTIN_PMULHW256,
28912 IX86_BUILTIN_PMULLW256,
28913 IX86_BUILTIN_PMULLD256,
28914 IX86_BUILTIN_PMULUDQ256,
28915 IX86_BUILTIN_POR256,
28916 IX86_BUILTIN_PSADBW256,
28917 IX86_BUILTIN_PSHUFB256,
28918 IX86_BUILTIN_PSHUFD256,
28919 IX86_BUILTIN_PSHUFHW256,
28920 IX86_BUILTIN_PSHUFLW256,
28921 IX86_BUILTIN_PSIGNB256,
28922 IX86_BUILTIN_PSIGNW256,
28923 IX86_BUILTIN_PSIGND256,
28924 IX86_BUILTIN_PSLLDQI256,
28925 IX86_BUILTIN_PSLLWI256,
28926 IX86_BUILTIN_PSLLW256,
28927 IX86_BUILTIN_PSLLDI256,
28928 IX86_BUILTIN_PSLLD256,
28929 IX86_BUILTIN_PSLLQI256,
28930 IX86_BUILTIN_PSLLQ256,
28931 IX86_BUILTIN_PSRAWI256,
28932 IX86_BUILTIN_PSRAW256,
28933 IX86_BUILTIN_PSRADI256,
28934 IX86_BUILTIN_PSRAD256,
28935 IX86_BUILTIN_PSRLDQI256,
28936 IX86_BUILTIN_PSRLWI256,
28937 IX86_BUILTIN_PSRLW256,
28938 IX86_BUILTIN_PSRLDI256,
28939 IX86_BUILTIN_PSRLD256,
28940 IX86_BUILTIN_PSRLQI256,
28941 IX86_BUILTIN_PSRLQ256,
28942 IX86_BUILTIN_PSUBB256,
28943 IX86_BUILTIN_PSUBW256,
28944 IX86_BUILTIN_PSUBD256,
28945 IX86_BUILTIN_PSUBQ256,
28946 IX86_BUILTIN_PSUBSB256,
28947 IX86_BUILTIN_PSUBSW256,
28948 IX86_BUILTIN_PSUBUSB256,
28949 IX86_BUILTIN_PSUBUSW256,
28950 IX86_BUILTIN_PUNPCKHBW256,
28951 IX86_BUILTIN_PUNPCKHWD256,
28952 IX86_BUILTIN_PUNPCKHDQ256,
28953 IX86_BUILTIN_PUNPCKHQDQ256,
28954 IX86_BUILTIN_PUNPCKLBW256,
28955 IX86_BUILTIN_PUNPCKLWD256,
28956 IX86_BUILTIN_PUNPCKLDQ256,
28957 IX86_BUILTIN_PUNPCKLQDQ256,
28958 IX86_BUILTIN_PXOR256,
28959 IX86_BUILTIN_MOVNTDQA256,
28960 IX86_BUILTIN_VBROADCASTSS_PS,
28961 IX86_BUILTIN_VBROADCASTSS_PS256,
28962 IX86_BUILTIN_VBROADCASTSD_PD256,
28963 IX86_BUILTIN_VBROADCASTSI256,
28964 IX86_BUILTIN_PBLENDD256,
28965 IX86_BUILTIN_PBLENDD128,
28966 IX86_BUILTIN_PBROADCASTB256,
28967 IX86_BUILTIN_PBROADCASTW256,
28968 IX86_BUILTIN_PBROADCASTD256,
28969 IX86_BUILTIN_PBROADCASTQ256,
28970 IX86_BUILTIN_PBROADCASTB128,
28971 IX86_BUILTIN_PBROADCASTW128,
28972 IX86_BUILTIN_PBROADCASTD128,
28973 IX86_BUILTIN_PBROADCASTQ128,
28974 IX86_BUILTIN_VPERMVARSI256,
28975 IX86_BUILTIN_VPERMDF256,
28976 IX86_BUILTIN_VPERMVARSF256,
28977 IX86_BUILTIN_VPERMDI256,
28978 IX86_BUILTIN_VPERMTI256,
28979 IX86_BUILTIN_VEXTRACT128I256,
28980 IX86_BUILTIN_VINSERT128I256,
28981 IX86_BUILTIN_MASKLOADD,
28982 IX86_BUILTIN_MASKLOADQ,
28983 IX86_BUILTIN_MASKLOADD256,
28984 IX86_BUILTIN_MASKLOADQ256,
28985 IX86_BUILTIN_MASKSTORED,
28986 IX86_BUILTIN_MASKSTOREQ,
28987 IX86_BUILTIN_MASKSTORED256,
28988 IX86_BUILTIN_MASKSTOREQ256,
28989 IX86_BUILTIN_PSLLVV4DI,
28990 IX86_BUILTIN_PSLLVV2DI,
28991 IX86_BUILTIN_PSLLVV8SI,
28992 IX86_BUILTIN_PSLLVV4SI,
28993 IX86_BUILTIN_PSRAVV8SI,
28994 IX86_BUILTIN_PSRAVV4SI,
28995 IX86_BUILTIN_PSRLVV4DI,
28996 IX86_BUILTIN_PSRLVV2DI,
28997 IX86_BUILTIN_PSRLVV8SI,
28998 IX86_BUILTIN_PSRLVV4SI,
29000 IX86_BUILTIN_GATHERSIV2DF,
29001 IX86_BUILTIN_GATHERSIV4DF,
29002 IX86_BUILTIN_GATHERDIV2DF,
29003 IX86_BUILTIN_GATHERDIV4DF,
29004 IX86_BUILTIN_GATHERSIV4SF,
29005 IX86_BUILTIN_GATHERSIV8SF,
29006 IX86_BUILTIN_GATHERDIV4SF,
29007 IX86_BUILTIN_GATHERDIV8SF,
29008 IX86_BUILTIN_GATHERSIV2DI,
29009 IX86_BUILTIN_GATHERSIV4DI,
29010 IX86_BUILTIN_GATHERDIV2DI,
29011 IX86_BUILTIN_GATHERDIV4DI,
29012 IX86_BUILTIN_GATHERSIV4SI,
29013 IX86_BUILTIN_GATHERSIV8SI,
29014 IX86_BUILTIN_GATHERDIV4SI,
29015 IX86_BUILTIN_GATHERDIV8SI,
29017 /* AVX512F */
29018 IX86_BUILTIN_SI512_SI256,
29019 IX86_BUILTIN_PD512_PD256,
29020 IX86_BUILTIN_PS512_PS256,
29021 IX86_BUILTIN_SI512_SI,
29022 IX86_BUILTIN_PD512_PD,
29023 IX86_BUILTIN_PS512_PS,
29024 IX86_BUILTIN_ADDPD512,
29025 IX86_BUILTIN_ADDPS512,
29026 IX86_BUILTIN_ADDSD_ROUND,
29027 IX86_BUILTIN_ADDSS_ROUND,
29028 IX86_BUILTIN_ALIGND512,
29029 IX86_BUILTIN_ALIGNQ512,
29030 IX86_BUILTIN_BLENDMD512,
29031 IX86_BUILTIN_BLENDMPD512,
29032 IX86_BUILTIN_BLENDMPS512,
29033 IX86_BUILTIN_BLENDMQ512,
29034 IX86_BUILTIN_BROADCASTF32X4_512,
29035 IX86_BUILTIN_BROADCASTF64X4_512,
29036 IX86_BUILTIN_BROADCASTI32X4_512,
29037 IX86_BUILTIN_BROADCASTI64X4_512,
29038 IX86_BUILTIN_BROADCASTSD512,
29039 IX86_BUILTIN_BROADCASTSS512,
29040 IX86_BUILTIN_CMPD512,
29041 IX86_BUILTIN_CMPPD512,
29042 IX86_BUILTIN_CMPPS512,
29043 IX86_BUILTIN_CMPQ512,
29044 IX86_BUILTIN_CMPSD_MASK,
29045 IX86_BUILTIN_CMPSS_MASK,
29046 IX86_BUILTIN_COMIDF,
29047 IX86_BUILTIN_COMISF,
29048 IX86_BUILTIN_COMPRESSPD512,
29049 IX86_BUILTIN_COMPRESSPDSTORE512,
29050 IX86_BUILTIN_COMPRESSPS512,
29051 IX86_BUILTIN_COMPRESSPSSTORE512,
29052 IX86_BUILTIN_CVTDQ2PD512,
29053 IX86_BUILTIN_CVTDQ2PS512,
29054 IX86_BUILTIN_CVTPD2DQ512,
29055 IX86_BUILTIN_CVTPD2PS512,
29056 IX86_BUILTIN_CVTPD2UDQ512,
29057 IX86_BUILTIN_CVTPH2PS512,
29058 IX86_BUILTIN_CVTPS2DQ512,
29059 IX86_BUILTIN_CVTPS2PD512,
29060 IX86_BUILTIN_CVTPS2PH512,
29061 IX86_BUILTIN_CVTPS2UDQ512,
29062 IX86_BUILTIN_CVTSD2SS_ROUND,
29063 IX86_BUILTIN_CVTSI2SD64,
29064 IX86_BUILTIN_CVTSI2SS32,
29065 IX86_BUILTIN_CVTSI2SS64,
29066 IX86_BUILTIN_CVTSS2SD_ROUND,
29067 IX86_BUILTIN_CVTTPD2DQ512,
29068 IX86_BUILTIN_CVTTPD2UDQ512,
29069 IX86_BUILTIN_CVTTPS2DQ512,
29070 IX86_BUILTIN_CVTTPS2UDQ512,
29071 IX86_BUILTIN_CVTUDQ2PD512,
29072 IX86_BUILTIN_CVTUDQ2PS512,
29073 IX86_BUILTIN_CVTUSI2SD32,
29074 IX86_BUILTIN_CVTUSI2SD64,
29075 IX86_BUILTIN_CVTUSI2SS32,
29076 IX86_BUILTIN_CVTUSI2SS64,
29077 IX86_BUILTIN_DIVPD512,
29078 IX86_BUILTIN_DIVPS512,
29079 IX86_BUILTIN_DIVSD_ROUND,
29080 IX86_BUILTIN_DIVSS_ROUND,
29081 IX86_BUILTIN_EXPANDPD512,
29082 IX86_BUILTIN_EXPANDPD512Z,
29083 IX86_BUILTIN_EXPANDPDLOAD512,
29084 IX86_BUILTIN_EXPANDPDLOAD512Z,
29085 IX86_BUILTIN_EXPANDPS512,
29086 IX86_BUILTIN_EXPANDPS512Z,
29087 IX86_BUILTIN_EXPANDPSLOAD512,
29088 IX86_BUILTIN_EXPANDPSLOAD512Z,
29089 IX86_BUILTIN_EXTRACTF32X4,
29090 IX86_BUILTIN_EXTRACTF64X4,
29091 IX86_BUILTIN_EXTRACTI32X4,
29092 IX86_BUILTIN_EXTRACTI64X4,
29093 IX86_BUILTIN_FIXUPIMMPD512_MASK,
29094 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29095 IX86_BUILTIN_FIXUPIMMPS512_MASK,
29096 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29097 IX86_BUILTIN_FIXUPIMMSD128_MASK,
29098 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29099 IX86_BUILTIN_FIXUPIMMSS128_MASK,
29100 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29101 IX86_BUILTIN_GETEXPPD512,
29102 IX86_BUILTIN_GETEXPPS512,
29103 IX86_BUILTIN_GETEXPSD128,
29104 IX86_BUILTIN_GETEXPSS128,
29105 IX86_BUILTIN_GETMANTPD512,
29106 IX86_BUILTIN_GETMANTPS512,
29107 IX86_BUILTIN_GETMANTSD128,
29108 IX86_BUILTIN_GETMANTSS128,
29109 IX86_BUILTIN_INSERTF32X4,
29110 IX86_BUILTIN_INSERTF64X4,
29111 IX86_BUILTIN_INSERTI32X4,
29112 IX86_BUILTIN_INSERTI64X4,
29113 IX86_BUILTIN_LOADAPD512,
29114 IX86_BUILTIN_LOADAPS512,
29115 IX86_BUILTIN_LOADDQUDI512,
29116 IX86_BUILTIN_LOADDQUSI512,
29117 IX86_BUILTIN_LOADUPD512,
29118 IX86_BUILTIN_LOADUPS512,
29119 IX86_BUILTIN_MAXPD512,
29120 IX86_BUILTIN_MAXPS512,
29121 IX86_BUILTIN_MAXSD_ROUND,
29122 IX86_BUILTIN_MAXSS_ROUND,
29123 IX86_BUILTIN_MINPD512,
29124 IX86_BUILTIN_MINPS512,
29125 IX86_BUILTIN_MINSD_ROUND,
29126 IX86_BUILTIN_MINSS_ROUND,
29127 IX86_BUILTIN_MOVAPD512,
29128 IX86_BUILTIN_MOVAPS512,
29129 IX86_BUILTIN_MOVDDUP512,
29130 IX86_BUILTIN_MOVDQA32LOAD512,
29131 IX86_BUILTIN_MOVDQA32STORE512,
29132 IX86_BUILTIN_MOVDQA32_512,
29133 IX86_BUILTIN_MOVDQA64LOAD512,
29134 IX86_BUILTIN_MOVDQA64STORE512,
29135 IX86_BUILTIN_MOVDQA64_512,
29136 IX86_BUILTIN_MOVNTDQ512,
29137 IX86_BUILTIN_MOVNTDQA512,
29138 IX86_BUILTIN_MOVNTPD512,
29139 IX86_BUILTIN_MOVNTPS512,
29140 IX86_BUILTIN_MOVSHDUP512,
29141 IX86_BUILTIN_MOVSLDUP512,
29142 IX86_BUILTIN_MULPD512,
29143 IX86_BUILTIN_MULPS512,
29144 IX86_BUILTIN_MULSD_ROUND,
29145 IX86_BUILTIN_MULSS_ROUND,
29146 IX86_BUILTIN_PABSD512,
29147 IX86_BUILTIN_PABSQ512,
29148 IX86_BUILTIN_PADDD512,
29149 IX86_BUILTIN_PADDQ512,
29150 IX86_BUILTIN_PANDD512,
29151 IX86_BUILTIN_PANDND512,
29152 IX86_BUILTIN_PANDNQ512,
29153 IX86_BUILTIN_PANDQ512,
29154 IX86_BUILTIN_PBROADCASTD512,
29155 IX86_BUILTIN_PBROADCASTD512_GPR,
29156 IX86_BUILTIN_PBROADCASTMB512,
29157 IX86_BUILTIN_PBROADCASTMW512,
29158 IX86_BUILTIN_PBROADCASTQ512,
29159 IX86_BUILTIN_PBROADCASTQ512_GPR,
29160 IX86_BUILTIN_PCMPEQD512_MASK,
29161 IX86_BUILTIN_PCMPEQQ512_MASK,
29162 IX86_BUILTIN_PCMPGTD512_MASK,
29163 IX86_BUILTIN_PCMPGTQ512_MASK,
29164 IX86_BUILTIN_PCOMPRESSD512,
29165 IX86_BUILTIN_PCOMPRESSDSTORE512,
29166 IX86_BUILTIN_PCOMPRESSQ512,
29167 IX86_BUILTIN_PCOMPRESSQSTORE512,
29168 IX86_BUILTIN_PEXPANDD512,
29169 IX86_BUILTIN_PEXPANDD512Z,
29170 IX86_BUILTIN_PEXPANDDLOAD512,
29171 IX86_BUILTIN_PEXPANDDLOAD512Z,
29172 IX86_BUILTIN_PEXPANDQ512,
29173 IX86_BUILTIN_PEXPANDQ512Z,
29174 IX86_BUILTIN_PEXPANDQLOAD512,
29175 IX86_BUILTIN_PEXPANDQLOAD512Z,
29176 IX86_BUILTIN_PMAXSD512,
29177 IX86_BUILTIN_PMAXSQ512,
29178 IX86_BUILTIN_PMAXUD512,
29179 IX86_BUILTIN_PMAXUQ512,
29180 IX86_BUILTIN_PMINSD512,
29181 IX86_BUILTIN_PMINSQ512,
29182 IX86_BUILTIN_PMINUD512,
29183 IX86_BUILTIN_PMINUQ512,
29184 IX86_BUILTIN_PMOVDB512,
29185 IX86_BUILTIN_PMOVDB512_MEM,
29186 IX86_BUILTIN_PMOVDW512,
29187 IX86_BUILTIN_PMOVDW512_MEM,
29188 IX86_BUILTIN_PMOVQB512,
29189 IX86_BUILTIN_PMOVQB512_MEM,
29190 IX86_BUILTIN_PMOVQD512,
29191 IX86_BUILTIN_PMOVQD512_MEM,
29192 IX86_BUILTIN_PMOVQW512,
29193 IX86_BUILTIN_PMOVQW512_MEM,
29194 IX86_BUILTIN_PMOVSDB512,
29195 IX86_BUILTIN_PMOVSDB512_MEM,
29196 IX86_BUILTIN_PMOVSDW512,
29197 IX86_BUILTIN_PMOVSDW512_MEM,
29198 IX86_BUILTIN_PMOVSQB512,
29199 IX86_BUILTIN_PMOVSQB512_MEM,
29200 IX86_BUILTIN_PMOVSQD512,
29201 IX86_BUILTIN_PMOVSQD512_MEM,
29202 IX86_BUILTIN_PMOVSQW512,
29203 IX86_BUILTIN_PMOVSQW512_MEM,
29204 IX86_BUILTIN_PMOVSXBD512,
29205 IX86_BUILTIN_PMOVSXBQ512,
29206 IX86_BUILTIN_PMOVSXDQ512,
29207 IX86_BUILTIN_PMOVSXWD512,
29208 IX86_BUILTIN_PMOVSXWQ512,
29209 IX86_BUILTIN_PMOVUSDB512,
29210 IX86_BUILTIN_PMOVUSDB512_MEM,
29211 IX86_BUILTIN_PMOVUSDW512,
29212 IX86_BUILTIN_PMOVUSDW512_MEM,
29213 IX86_BUILTIN_PMOVUSQB512,
29214 IX86_BUILTIN_PMOVUSQB512_MEM,
29215 IX86_BUILTIN_PMOVUSQD512,
29216 IX86_BUILTIN_PMOVUSQD512_MEM,
29217 IX86_BUILTIN_PMOVUSQW512,
29218 IX86_BUILTIN_PMOVUSQW512_MEM,
29219 IX86_BUILTIN_PMOVZXBD512,
29220 IX86_BUILTIN_PMOVZXBQ512,
29221 IX86_BUILTIN_PMOVZXDQ512,
29222 IX86_BUILTIN_PMOVZXWD512,
29223 IX86_BUILTIN_PMOVZXWQ512,
29224 IX86_BUILTIN_PMULDQ512,
29225 IX86_BUILTIN_PMULLD512,
29226 IX86_BUILTIN_PMULUDQ512,
29227 IX86_BUILTIN_PORD512,
29228 IX86_BUILTIN_PORQ512,
29229 IX86_BUILTIN_PROLD512,
29230 IX86_BUILTIN_PROLQ512,
29231 IX86_BUILTIN_PROLVD512,
29232 IX86_BUILTIN_PROLVQ512,
29233 IX86_BUILTIN_PRORD512,
29234 IX86_BUILTIN_PRORQ512,
29235 IX86_BUILTIN_PRORVD512,
29236 IX86_BUILTIN_PRORVQ512,
29237 IX86_BUILTIN_PSHUFD512,
29238 IX86_BUILTIN_PSLLD512,
29239 IX86_BUILTIN_PSLLDI512,
29240 IX86_BUILTIN_PSLLQ512,
29241 IX86_BUILTIN_PSLLQI512,
29242 IX86_BUILTIN_PSLLVV16SI,
29243 IX86_BUILTIN_PSLLVV8DI,
29244 IX86_BUILTIN_PSRAD512,
29245 IX86_BUILTIN_PSRADI512,
29246 IX86_BUILTIN_PSRAQ512,
29247 IX86_BUILTIN_PSRAQI512,
29248 IX86_BUILTIN_PSRAVV16SI,
29249 IX86_BUILTIN_PSRAVV8DI,
29250 IX86_BUILTIN_PSRLD512,
29251 IX86_BUILTIN_PSRLDI512,
29252 IX86_BUILTIN_PSRLQ512,
29253 IX86_BUILTIN_PSRLQI512,
29254 IX86_BUILTIN_PSRLVV16SI,
29255 IX86_BUILTIN_PSRLVV8DI,
29256 IX86_BUILTIN_PSUBD512,
29257 IX86_BUILTIN_PSUBQ512,
29258 IX86_BUILTIN_PTESTMD512,
29259 IX86_BUILTIN_PTESTMQ512,
29260 IX86_BUILTIN_PTESTNMD512,
29261 IX86_BUILTIN_PTESTNMQ512,
29262 IX86_BUILTIN_PUNPCKHDQ512,
29263 IX86_BUILTIN_PUNPCKHQDQ512,
29264 IX86_BUILTIN_PUNPCKLDQ512,
29265 IX86_BUILTIN_PUNPCKLQDQ512,
29266 IX86_BUILTIN_PXORD512,
29267 IX86_BUILTIN_PXORQ512,
29268 IX86_BUILTIN_RCP14PD512,
29269 IX86_BUILTIN_RCP14PS512,
29270 IX86_BUILTIN_RCP14SD,
29271 IX86_BUILTIN_RCP14SS,
29272 IX86_BUILTIN_RNDSCALEPD,
29273 IX86_BUILTIN_RNDSCALEPS,
29274 IX86_BUILTIN_RNDSCALESD,
29275 IX86_BUILTIN_RNDSCALESS,
29276 IX86_BUILTIN_RSQRT14PD512,
29277 IX86_BUILTIN_RSQRT14PS512,
29278 IX86_BUILTIN_RSQRT14SD,
29279 IX86_BUILTIN_RSQRT14SS,
29280 IX86_BUILTIN_SCALEFPD512,
29281 IX86_BUILTIN_SCALEFPS512,
29282 IX86_BUILTIN_SCALEFSD,
29283 IX86_BUILTIN_SCALEFSS,
29284 IX86_BUILTIN_SHUFPD512,
29285 IX86_BUILTIN_SHUFPS512,
29286 IX86_BUILTIN_SHUF_F32x4,
29287 IX86_BUILTIN_SHUF_F64x2,
29288 IX86_BUILTIN_SHUF_I32x4,
29289 IX86_BUILTIN_SHUF_I64x2,
29290 IX86_BUILTIN_SQRTPD512,
29291 IX86_BUILTIN_SQRTPD512_MASK,
29292 IX86_BUILTIN_SQRTPS512_MASK,
29293 IX86_BUILTIN_SQRTPS_NR512,
29294 IX86_BUILTIN_SQRTSD_ROUND,
29295 IX86_BUILTIN_SQRTSS_ROUND,
29296 IX86_BUILTIN_STOREAPD512,
29297 IX86_BUILTIN_STOREAPS512,
29298 IX86_BUILTIN_STOREDQUDI512,
29299 IX86_BUILTIN_STOREDQUSI512,
29300 IX86_BUILTIN_STOREUPD512,
29301 IX86_BUILTIN_STOREUPS512,
29302 IX86_BUILTIN_SUBPD512,
29303 IX86_BUILTIN_SUBPS512,
29304 IX86_BUILTIN_SUBSD_ROUND,
29305 IX86_BUILTIN_SUBSS_ROUND,
29306 IX86_BUILTIN_UCMPD512,
29307 IX86_BUILTIN_UCMPQ512,
29308 IX86_BUILTIN_UNPCKHPD512,
29309 IX86_BUILTIN_UNPCKHPS512,
29310 IX86_BUILTIN_UNPCKLPD512,
29311 IX86_BUILTIN_UNPCKLPS512,
29312 IX86_BUILTIN_VCVTSD2SI32,
29313 IX86_BUILTIN_VCVTSD2SI64,
29314 IX86_BUILTIN_VCVTSD2USI32,
29315 IX86_BUILTIN_VCVTSD2USI64,
29316 IX86_BUILTIN_VCVTSS2SI32,
29317 IX86_BUILTIN_VCVTSS2SI64,
29318 IX86_BUILTIN_VCVTSS2USI32,
29319 IX86_BUILTIN_VCVTSS2USI64,
29320 IX86_BUILTIN_VCVTTSD2SI32,
29321 IX86_BUILTIN_VCVTTSD2SI64,
29322 IX86_BUILTIN_VCVTTSD2USI32,
29323 IX86_BUILTIN_VCVTTSD2USI64,
29324 IX86_BUILTIN_VCVTTSS2SI32,
29325 IX86_BUILTIN_VCVTTSS2SI64,
29326 IX86_BUILTIN_VCVTTSS2USI32,
29327 IX86_BUILTIN_VCVTTSS2USI64,
29328 IX86_BUILTIN_VFMADDPD512_MASK,
29329 IX86_BUILTIN_VFMADDPD512_MASK3,
29330 IX86_BUILTIN_VFMADDPD512_MASKZ,
29331 IX86_BUILTIN_VFMADDPS512_MASK,
29332 IX86_BUILTIN_VFMADDPS512_MASK3,
29333 IX86_BUILTIN_VFMADDPS512_MASKZ,
29334 IX86_BUILTIN_VFMADDSD3_ROUND,
29335 IX86_BUILTIN_VFMADDSS3_ROUND,
29336 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29337 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29338 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29339 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29340 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29341 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29342 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29343 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29344 IX86_BUILTIN_VFMSUBPD512_MASK3,
29345 IX86_BUILTIN_VFMSUBPS512_MASK3,
29346 IX86_BUILTIN_VFMSUBSD3_MASK3,
29347 IX86_BUILTIN_VFMSUBSS3_MASK3,
29348 IX86_BUILTIN_VFNMADDPD512_MASK,
29349 IX86_BUILTIN_VFNMADDPS512_MASK,
29350 IX86_BUILTIN_VFNMSUBPD512_MASK,
29351 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29352 IX86_BUILTIN_VFNMSUBPS512_MASK,
29353 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29354 IX86_BUILTIN_VPCLZCNTD512,
29355 IX86_BUILTIN_VPCLZCNTQ512,
29356 IX86_BUILTIN_VPCONFLICTD512,
29357 IX86_BUILTIN_VPCONFLICTQ512,
29358 IX86_BUILTIN_VPERMDF512,
29359 IX86_BUILTIN_VPERMDI512,
29360 IX86_BUILTIN_VPERMI2VARD512,
29361 IX86_BUILTIN_VPERMI2VARPD512,
29362 IX86_BUILTIN_VPERMI2VARPS512,
29363 IX86_BUILTIN_VPERMI2VARQ512,
29364 IX86_BUILTIN_VPERMILPD512,
29365 IX86_BUILTIN_VPERMILPS512,
29366 IX86_BUILTIN_VPERMILVARPD512,
29367 IX86_BUILTIN_VPERMILVARPS512,
29368 IX86_BUILTIN_VPERMT2VARD512,
29369 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29370 IX86_BUILTIN_VPERMT2VARPD512,
29371 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29372 IX86_BUILTIN_VPERMT2VARPS512,
29373 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29374 IX86_BUILTIN_VPERMT2VARQ512,
29375 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29376 IX86_BUILTIN_VPERMVARDF512,
29377 IX86_BUILTIN_VPERMVARDI512,
29378 IX86_BUILTIN_VPERMVARSF512,
29379 IX86_BUILTIN_VPERMVARSI512,
29380 IX86_BUILTIN_VTERNLOGD512_MASK,
29381 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29382 IX86_BUILTIN_VTERNLOGQ512_MASK,
29383 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29385 /* Mask arithmetic operations */
29386 IX86_BUILTIN_KAND16,
29387 IX86_BUILTIN_KANDN16,
29388 IX86_BUILTIN_KNOT16,
29389 IX86_BUILTIN_KOR16,
29390 IX86_BUILTIN_KORTESTC16,
29391 IX86_BUILTIN_KORTESTZ16,
29392 IX86_BUILTIN_KUNPCKBW,
29393 IX86_BUILTIN_KXNOR16,
29394 IX86_BUILTIN_KXOR16,
29395 IX86_BUILTIN_KMOV16,
29397 /* AVX512VL. */
29398 IX86_BUILTIN_PMOVUSQD256_MEM,
29399 IX86_BUILTIN_PMOVUSQD128_MEM,
29400 IX86_BUILTIN_PMOVSQD256_MEM,
29401 IX86_BUILTIN_PMOVSQD128_MEM,
29402 IX86_BUILTIN_PMOVQD256_MEM,
29403 IX86_BUILTIN_PMOVQD128_MEM,
29404 IX86_BUILTIN_PMOVUSQW256_MEM,
29405 IX86_BUILTIN_PMOVUSQW128_MEM,
29406 IX86_BUILTIN_PMOVSQW256_MEM,
29407 IX86_BUILTIN_PMOVSQW128_MEM,
29408 IX86_BUILTIN_PMOVQW256_MEM,
29409 IX86_BUILTIN_PMOVQW128_MEM,
29410 IX86_BUILTIN_PMOVUSQB256_MEM,
29411 IX86_BUILTIN_PMOVUSQB128_MEM,
29412 IX86_BUILTIN_PMOVSQB256_MEM,
29413 IX86_BUILTIN_PMOVSQB128_MEM,
29414 IX86_BUILTIN_PMOVQB256_MEM,
29415 IX86_BUILTIN_PMOVQB128_MEM,
29416 IX86_BUILTIN_PMOVUSDW256_MEM,
29417 IX86_BUILTIN_PMOVUSDW128_MEM,
29418 IX86_BUILTIN_PMOVSDW256_MEM,
29419 IX86_BUILTIN_PMOVSDW128_MEM,
29420 IX86_BUILTIN_PMOVDW256_MEM,
29421 IX86_BUILTIN_PMOVDW128_MEM,
29422 IX86_BUILTIN_PMOVUSDB256_MEM,
29423 IX86_BUILTIN_PMOVUSDB128_MEM,
29424 IX86_BUILTIN_PMOVSDB256_MEM,
29425 IX86_BUILTIN_PMOVSDB128_MEM,
29426 IX86_BUILTIN_PMOVDB256_MEM,
29427 IX86_BUILTIN_PMOVDB128_MEM,
29428 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29429 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29430 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29431 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29432 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29433 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29434 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29435 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29436 IX86_BUILTIN_LOADAPD256_MASK,
29437 IX86_BUILTIN_LOADAPD128_MASK,
29438 IX86_BUILTIN_LOADAPS256_MASK,
29439 IX86_BUILTIN_LOADAPS128_MASK,
29440 IX86_BUILTIN_STOREAPD256_MASK,
29441 IX86_BUILTIN_STOREAPD128_MASK,
29442 IX86_BUILTIN_STOREAPS256_MASK,
29443 IX86_BUILTIN_STOREAPS128_MASK,
29444 IX86_BUILTIN_LOADUPD256_MASK,
29445 IX86_BUILTIN_LOADUPD128_MASK,
29446 IX86_BUILTIN_LOADUPS256_MASK,
29447 IX86_BUILTIN_LOADUPS128_MASK,
29448 IX86_BUILTIN_STOREUPD256_MASK,
29449 IX86_BUILTIN_STOREUPD128_MASK,
29450 IX86_BUILTIN_STOREUPS256_MASK,
29451 IX86_BUILTIN_STOREUPS128_MASK,
29452 IX86_BUILTIN_LOADDQUDI256_MASK,
29453 IX86_BUILTIN_LOADDQUDI128_MASK,
29454 IX86_BUILTIN_LOADDQUSI256_MASK,
29455 IX86_BUILTIN_LOADDQUSI128_MASK,
29456 IX86_BUILTIN_LOADDQUHI256_MASK,
29457 IX86_BUILTIN_LOADDQUHI128_MASK,
29458 IX86_BUILTIN_LOADDQUQI256_MASK,
29459 IX86_BUILTIN_LOADDQUQI128_MASK,
29460 IX86_BUILTIN_STOREDQUDI256_MASK,
29461 IX86_BUILTIN_STOREDQUDI128_MASK,
29462 IX86_BUILTIN_STOREDQUSI256_MASK,
29463 IX86_BUILTIN_STOREDQUSI128_MASK,
29464 IX86_BUILTIN_STOREDQUHI256_MASK,
29465 IX86_BUILTIN_STOREDQUHI128_MASK,
29466 IX86_BUILTIN_STOREDQUQI256_MASK,
29467 IX86_BUILTIN_STOREDQUQI128_MASK,
29468 IX86_BUILTIN_COMPRESSPDSTORE256,
29469 IX86_BUILTIN_COMPRESSPDSTORE128,
29470 IX86_BUILTIN_COMPRESSPSSTORE256,
29471 IX86_BUILTIN_COMPRESSPSSTORE128,
29472 IX86_BUILTIN_PCOMPRESSQSTORE256,
29473 IX86_BUILTIN_PCOMPRESSQSTORE128,
29474 IX86_BUILTIN_PCOMPRESSDSTORE256,
29475 IX86_BUILTIN_PCOMPRESSDSTORE128,
29476 IX86_BUILTIN_EXPANDPDLOAD256,
29477 IX86_BUILTIN_EXPANDPDLOAD128,
29478 IX86_BUILTIN_EXPANDPSLOAD256,
29479 IX86_BUILTIN_EXPANDPSLOAD128,
29480 IX86_BUILTIN_PEXPANDQLOAD256,
29481 IX86_BUILTIN_PEXPANDQLOAD128,
29482 IX86_BUILTIN_PEXPANDDLOAD256,
29483 IX86_BUILTIN_PEXPANDDLOAD128,
29484 IX86_BUILTIN_EXPANDPDLOAD256Z,
29485 IX86_BUILTIN_EXPANDPDLOAD128Z,
29486 IX86_BUILTIN_EXPANDPSLOAD256Z,
29487 IX86_BUILTIN_EXPANDPSLOAD128Z,
29488 IX86_BUILTIN_PEXPANDQLOAD256Z,
29489 IX86_BUILTIN_PEXPANDQLOAD128Z,
29490 IX86_BUILTIN_PEXPANDDLOAD256Z,
29491 IX86_BUILTIN_PEXPANDDLOAD128Z,
29492 IX86_BUILTIN_PALIGNR256_MASK,
29493 IX86_BUILTIN_PALIGNR128_MASK,
29494 IX86_BUILTIN_MOVDQA64_256_MASK,
29495 IX86_BUILTIN_MOVDQA64_128_MASK,
29496 IX86_BUILTIN_MOVDQA32_256_MASK,
29497 IX86_BUILTIN_MOVDQA32_128_MASK,
29498 IX86_BUILTIN_MOVAPD256_MASK,
29499 IX86_BUILTIN_MOVAPD128_MASK,
29500 IX86_BUILTIN_MOVAPS256_MASK,
29501 IX86_BUILTIN_MOVAPS128_MASK,
29502 IX86_BUILTIN_MOVDQUHI256_MASK,
29503 IX86_BUILTIN_MOVDQUHI128_MASK,
29504 IX86_BUILTIN_MOVDQUQI256_MASK,
29505 IX86_BUILTIN_MOVDQUQI128_MASK,
29506 IX86_BUILTIN_MINPS128_MASK,
29507 IX86_BUILTIN_MAXPS128_MASK,
29508 IX86_BUILTIN_MINPD128_MASK,
29509 IX86_BUILTIN_MAXPD128_MASK,
29510 IX86_BUILTIN_MAXPD256_MASK,
29511 IX86_BUILTIN_MAXPS256_MASK,
29512 IX86_BUILTIN_MINPD256_MASK,
29513 IX86_BUILTIN_MINPS256_MASK,
29514 IX86_BUILTIN_MULPS128_MASK,
29515 IX86_BUILTIN_DIVPS128_MASK,
29516 IX86_BUILTIN_MULPD128_MASK,
29517 IX86_BUILTIN_DIVPD128_MASK,
29518 IX86_BUILTIN_DIVPD256_MASK,
29519 IX86_BUILTIN_DIVPS256_MASK,
29520 IX86_BUILTIN_MULPD256_MASK,
29521 IX86_BUILTIN_MULPS256_MASK,
29522 IX86_BUILTIN_ADDPD128_MASK,
29523 IX86_BUILTIN_ADDPD256_MASK,
29524 IX86_BUILTIN_ADDPS128_MASK,
29525 IX86_BUILTIN_ADDPS256_MASK,
29526 IX86_BUILTIN_SUBPD128_MASK,
29527 IX86_BUILTIN_SUBPD256_MASK,
29528 IX86_BUILTIN_SUBPS128_MASK,
29529 IX86_BUILTIN_SUBPS256_MASK,
29530 IX86_BUILTIN_XORPD256_MASK,
29531 IX86_BUILTIN_XORPD128_MASK,
29532 IX86_BUILTIN_XORPS256_MASK,
29533 IX86_BUILTIN_XORPS128_MASK,
29534 IX86_BUILTIN_ORPD256_MASK,
29535 IX86_BUILTIN_ORPD128_MASK,
29536 IX86_BUILTIN_ORPS256_MASK,
29537 IX86_BUILTIN_ORPS128_MASK,
29538 IX86_BUILTIN_BROADCASTF32x2_256,
29539 IX86_BUILTIN_BROADCASTI32x2_256,
29540 IX86_BUILTIN_BROADCASTI32x2_128,
29541 IX86_BUILTIN_BROADCASTF64X2_256,
29542 IX86_BUILTIN_BROADCASTI64X2_256,
29543 IX86_BUILTIN_BROADCASTF32X4_256,
29544 IX86_BUILTIN_BROADCASTI32X4_256,
29545 IX86_BUILTIN_EXTRACTF32X4_256,
29546 IX86_BUILTIN_EXTRACTI32X4_256,
29547 IX86_BUILTIN_DBPSADBW256,
29548 IX86_BUILTIN_DBPSADBW128,
29549 IX86_BUILTIN_CVTTPD2QQ256,
29550 IX86_BUILTIN_CVTTPD2QQ128,
29551 IX86_BUILTIN_CVTTPD2UQQ256,
29552 IX86_BUILTIN_CVTTPD2UQQ128,
29553 IX86_BUILTIN_CVTPD2QQ256,
29554 IX86_BUILTIN_CVTPD2QQ128,
29555 IX86_BUILTIN_CVTPD2UQQ256,
29556 IX86_BUILTIN_CVTPD2UQQ128,
29557 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29558 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29559 IX86_BUILTIN_CVTTPS2QQ256,
29560 IX86_BUILTIN_CVTTPS2QQ128,
29561 IX86_BUILTIN_CVTTPS2UQQ256,
29562 IX86_BUILTIN_CVTTPS2UQQ128,
29563 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29564 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29565 IX86_BUILTIN_CVTTPS2UDQ256,
29566 IX86_BUILTIN_CVTTPS2UDQ128,
29567 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29568 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29569 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29570 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29571 IX86_BUILTIN_CVTPD2DQ256_MASK,
29572 IX86_BUILTIN_CVTPD2DQ128_MASK,
29573 IX86_BUILTIN_CVTDQ2PD256_MASK,
29574 IX86_BUILTIN_CVTDQ2PD128_MASK,
29575 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29576 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29577 IX86_BUILTIN_CVTDQ2PS256_MASK,
29578 IX86_BUILTIN_CVTDQ2PS128_MASK,
29579 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29580 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29581 IX86_BUILTIN_CVTPS2PD256_MASK,
29582 IX86_BUILTIN_CVTPS2PD128_MASK,
29583 IX86_BUILTIN_PBROADCASTB256_MASK,
29584 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29585 IX86_BUILTIN_PBROADCASTB128_MASK,
29586 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29587 IX86_BUILTIN_PBROADCASTW256_MASK,
29588 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29589 IX86_BUILTIN_PBROADCASTW128_MASK,
29590 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29591 IX86_BUILTIN_PBROADCASTD256_MASK,
29592 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29593 IX86_BUILTIN_PBROADCASTD128_MASK,
29594 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29595 IX86_BUILTIN_PBROADCASTQ256_MASK,
29596 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29597 IX86_BUILTIN_PBROADCASTQ128_MASK,
29598 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29599 IX86_BUILTIN_BROADCASTSS256,
29600 IX86_BUILTIN_BROADCASTSS128,
29601 IX86_BUILTIN_BROADCASTSD256,
29602 IX86_BUILTIN_EXTRACTF64X2_256,
29603 IX86_BUILTIN_EXTRACTI64X2_256,
29604 IX86_BUILTIN_INSERTF32X4_256,
29605 IX86_BUILTIN_INSERTI32X4_256,
29606 IX86_BUILTIN_PMOVSXBW256_MASK,
29607 IX86_BUILTIN_PMOVSXBW128_MASK,
29608 IX86_BUILTIN_PMOVSXBD256_MASK,
29609 IX86_BUILTIN_PMOVSXBD128_MASK,
29610 IX86_BUILTIN_PMOVSXBQ256_MASK,
29611 IX86_BUILTIN_PMOVSXBQ128_MASK,
29612 IX86_BUILTIN_PMOVSXWD256_MASK,
29613 IX86_BUILTIN_PMOVSXWD128_MASK,
29614 IX86_BUILTIN_PMOVSXWQ256_MASK,
29615 IX86_BUILTIN_PMOVSXWQ128_MASK,
29616 IX86_BUILTIN_PMOVSXDQ256_MASK,
29617 IX86_BUILTIN_PMOVSXDQ128_MASK,
29618 IX86_BUILTIN_PMOVZXBW256_MASK,
29619 IX86_BUILTIN_PMOVZXBW128_MASK,
29620 IX86_BUILTIN_PMOVZXBD256_MASK,
29621 IX86_BUILTIN_PMOVZXBD128_MASK,
29622 IX86_BUILTIN_PMOVZXBQ256_MASK,
29623 IX86_BUILTIN_PMOVZXBQ128_MASK,
29624 IX86_BUILTIN_PMOVZXWD256_MASK,
29625 IX86_BUILTIN_PMOVZXWD128_MASK,
29626 IX86_BUILTIN_PMOVZXWQ256_MASK,
29627 IX86_BUILTIN_PMOVZXWQ128_MASK,
29628 IX86_BUILTIN_PMOVZXDQ256_MASK,
29629 IX86_BUILTIN_PMOVZXDQ128_MASK,
29630 IX86_BUILTIN_REDUCEPD256_MASK,
29631 IX86_BUILTIN_REDUCEPD128_MASK,
29632 IX86_BUILTIN_REDUCEPS256_MASK,
29633 IX86_BUILTIN_REDUCEPS128_MASK,
29634 IX86_BUILTIN_REDUCESD_MASK,
29635 IX86_BUILTIN_REDUCESS_MASK,
29636 IX86_BUILTIN_VPERMVARHI256_MASK,
29637 IX86_BUILTIN_VPERMVARHI128_MASK,
29638 IX86_BUILTIN_VPERMT2VARHI256,
29639 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29640 IX86_BUILTIN_VPERMT2VARHI128,
29641 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29642 IX86_BUILTIN_VPERMI2VARHI256,
29643 IX86_BUILTIN_VPERMI2VARHI128,
29644 IX86_BUILTIN_RCP14PD256,
29645 IX86_BUILTIN_RCP14PD128,
29646 IX86_BUILTIN_RCP14PS256,
29647 IX86_BUILTIN_RCP14PS128,
29648 IX86_BUILTIN_RSQRT14PD256_MASK,
29649 IX86_BUILTIN_RSQRT14PD128_MASK,
29650 IX86_BUILTIN_RSQRT14PS256_MASK,
29651 IX86_BUILTIN_RSQRT14PS128_MASK,
29652 IX86_BUILTIN_SQRTPD256_MASK,
29653 IX86_BUILTIN_SQRTPD128_MASK,
29654 IX86_BUILTIN_SQRTPS256_MASK,
29655 IX86_BUILTIN_SQRTPS128_MASK,
29656 IX86_BUILTIN_PADDB128_MASK,
29657 IX86_BUILTIN_PADDW128_MASK,
29658 IX86_BUILTIN_PADDD128_MASK,
29659 IX86_BUILTIN_PADDQ128_MASK,
29660 IX86_BUILTIN_PSUBB128_MASK,
29661 IX86_BUILTIN_PSUBW128_MASK,
29662 IX86_BUILTIN_PSUBD128_MASK,
29663 IX86_BUILTIN_PSUBQ128_MASK,
29664 IX86_BUILTIN_PADDSB128_MASK,
29665 IX86_BUILTIN_PADDSW128_MASK,
29666 IX86_BUILTIN_PSUBSB128_MASK,
29667 IX86_BUILTIN_PSUBSW128_MASK,
29668 IX86_BUILTIN_PADDUSB128_MASK,
29669 IX86_BUILTIN_PADDUSW128_MASK,
29670 IX86_BUILTIN_PSUBUSB128_MASK,
29671 IX86_BUILTIN_PSUBUSW128_MASK,
29672 IX86_BUILTIN_PADDB256_MASK,
29673 IX86_BUILTIN_PADDW256_MASK,
29674 IX86_BUILTIN_PADDD256_MASK,
29675 IX86_BUILTIN_PADDQ256_MASK,
29676 IX86_BUILTIN_PADDSB256_MASK,
29677 IX86_BUILTIN_PADDSW256_MASK,
29678 IX86_BUILTIN_PADDUSB256_MASK,
29679 IX86_BUILTIN_PADDUSW256_MASK,
29680 IX86_BUILTIN_PSUBB256_MASK,
29681 IX86_BUILTIN_PSUBW256_MASK,
29682 IX86_BUILTIN_PSUBD256_MASK,
29683 IX86_BUILTIN_PSUBQ256_MASK,
29684 IX86_BUILTIN_PSUBSB256_MASK,
29685 IX86_BUILTIN_PSUBSW256_MASK,
29686 IX86_BUILTIN_PSUBUSB256_MASK,
29687 IX86_BUILTIN_PSUBUSW256_MASK,
29688 IX86_BUILTIN_SHUF_F64x2_256,
29689 IX86_BUILTIN_SHUF_I64x2_256,
29690 IX86_BUILTIN_SHUF_I32x4_256,
29691 IX86_BUILTIN_SHUF_F32x4_256,
29692 IX86_BUILTIN_PMOVWB128,
29693 IX86_BUILTIN_PMOVWB256,
29694 IX86_BUILTIN_PMOVSWB128,
29695 IX86_BUILTIN_PMOVSWB256,
29696 IX86_BUILTIN_PMOVUSWB128,
29697 IX86_BUILTIN_PMOVUSWB256,
29698 IX86_BUILTIN_PMOVDB128,
29699 IX86_BUILTIN_PMOVDB256,
29700 IX86_BUILTIN_PMOVSDB128,
29701 IX86_BUILTIN_PMOVSDB256,
29702 IX86_BUILTIN_PMOVUSDB128,
29703 IX86_BUILTIN_PMOVUSDB256,
29704 IX86_BUILTIN_PMOVDW128,
29705 IX86_BUILTIN_PMOVDW256,
29706 IX86_BUILTIN_PMOVSDW128,
29707 IX86_BUILTIN_PMOVSDW256,
29708 IX86_BUILTIN_PMOVUSDW128,
29709 IX86_BUILTIN_PMOVUSDW256,
29710 IX86_BUILTIN_PMOVQB128,
29711 IX86_BUILTIN_PMOVQB256,
29712 IX86_BUILTIN_PMOVSQB128,
29713 IX86_BUILTIN_PMOVSQB256,
29714 IX86_BUILTIN_PMOVUSQB128,
29715 IX86_BUILTIN_PMOVUSQB256,
29716 IX86_BUILTIN_PMOVQW128,
29717 IX86_BUILTIN_PMOVQW256,
29718 IX86_BUILTIN_PMOVSQW128,
29719 IX86_BUILTIN_PMOVSQW256,
29720 IX86_BUILTIN_PMOVUSQW128,
29721 IX86_BUILTIN_PMOVUSQW256,
29722 IX86_BUILTIN_PMOVQD128,
29723 IX86_BUILTIN_PMOVQD256,
29724 IX86_BUILTIN_PMOVSQD128,
29725 IX86_BUILTIN_PMOVSQD256,
29726 IX86_BUILTIN_PMOVUSQD128,
29727 IX86_BUILTIN_PMOVUSQD256,
29728 IX86_BUILTIN_RANGEPD256,
29729 IX86_BUILTIN_RANGEPD128,
29730 IX86_BUILTIN_RANGEPS256,
29731 IX86_BUILTIN_RANGEPS128,
29732 IX86_BUILTIN_GETEXPPS256,
29733 IX86_BUILTIN_GETEXPPD256,
29734 IX86_BUILTIN_GETEXPPS128,
29735 IX86_BUILTIN_GETEXPPD128,
29736 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29737 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29738 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29739 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29740 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29741 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29742 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29743 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29744 IX86_BUILTIN_PABSQ256,
29745 IX86_BUILTIN_PABSQ128,
29746 IX86_BUILTIN_PABSD256_MASK,
29747 IX86_BUILTIN_PABSD128_MASK,
29748 IX86_BUILTIN_PMULHRSW256_MASK,
29749 IX86_BUILTIN_PMULHRSW128_MASK,
29750 IX86_BUILTIN_PMULHUW128_MASK,
29751 IX86_BUILTIN_PMULHUW256_MASK,
29752 IX86_BUILTIN_PMULHW256_MASK,
29753 IX86_BUILTIN_PMULHW128_MASK,
29754 IX86_BUILTIN_PMULLW256_MASK,
29755 IX86_BUILTIN_PMULLW128_MASK,
29756 IX86_BUILTIN_PMULLQ256,
29757 IX86_BUILTIN_PMULLQ128,
29758 IX86_BUILTIN_ANDPD256_MASK,
29759 IX86_BUILTIN_ANDPD128_MASK,
29760 IX86_BUILTIN_ANDPS256_MASK,
29761 IX86_BUILTIN_ANDPS128_MASK,
29762 IX86_BUILTIN_ANDNPD256_MASK,
29763 IX86_BUILTIN_ANDNPD128_MASK,
29764 IX86_BUILTIN_ANDNPS256_MASK,
29765 IX86_BUILTIN_ANDNPS128_MASK,
29766 IX86_BUILTIN_PSLLWI128_MASK,
29767 IX86_BUILTIN_PSLLDI128_MASK,
29768 IX86_BUILTIN_PSLLQI128_MASK,
29769 IX86_BUILTIN_PSLLW128_MASK,
29770 IX86_BUILTIN_PSLLD128_MASK,
29771 IX86_BUILTIN_PSLLQ128_MASK,
29772 IX86_BUILTIN_PSLLWI256_MASK ,
29773 IX86_BUILTIN_PSLLW256_MASK,
29774 IX86_BUILTIN_PSLLDI256_MASK,
29775 IX86_BUILTIN_PSLLD256_MASK,
29776 IX86_BUILTIN_PSLLQI256_MASK,
29777 IX86_BUILTIN_PSLLQ256_MASK,
29778 IX86_BUILTIN_PSRADI128_MASK,
29779 IX86_BUILTIN_PSRAD128_MASK,
29780 IX86_BUILTIN_PSRADI256_MASK,
29781 IX86_BUILTIN_PSRAD256_MASK,
29782 IX86_BUILTIN_PSRAQI128_MASK,
29783 IX86_BUILTIN_PSRAQ128_MASK,
29784 IX86_BUILTIN_PSRAQI256_MASK,
29785 IX86_BUILTIN_PSRAQ256_MASK,
29786 IX86_BUILTIN_PANDD256,
29787 IX86_BUILTIN_PANDD128,
29788 IX86_BUILTIN_PSRLDI128_MASK,
29789 IX86_BUILTIN_PSRLD128_MASK,
29790 IX86_BUILTIN_PSRLDI256_MASK,
29791 IX86_BUILTIN_PSRLD256_MASK,
29792 IX86_BUILTIN_PSRLQI128_MASK,
29793 IX86_BUILTIN_PSRLQ128_MASK,
29794 IX86_BUILTIN_PSRLQI256_MASK,
29795 IX86_BUILTIN_PSRLQ256_MASK,
29796 IX86_BUILTIN_PANDQ256,
29797 IX86_BUILTIN_PANDQ128,
29798 IX86_BUILTIN_PANDND256,
29799 IX86_BUILTIN_PANDND128,
29800 IX86_BUILTIN_PANDNQ256,
29801 IX86_BUILTIN_PANDNQ128,
29802 IX86_BUILTIN_PORD256,
29803 IX86_BUILTIN_PORD128,
29804 IX86_BUILTIN_PORQ256,
29805 IX86_BUILTIN_PORQ128,
29806 IX86_BUILTIN_PXORD256,
29807 IX86_BUILTIN_PXORD128,
29808 IX86_BUILTIN_PXORQ256,
29809 IX86_BUILTIN_PXORQ128,
29810 IX86_BUILTIN_PACKSSWB256_MASK,
29811 IX86_BUILTIN_PACKSSWB128_MASK,
29812 IX86_BUILTIN_PACKUSWB256_MASK,
29813 IX86_BUILTIN_PACKUSWB128_MASK,
29814 IX86_BUILTIN_RNDSCALEPS256,
29815 IX86_BUILTIN_RNDSCALEPD256,
29816 IX86_BUILTIN_RNDSCALEPS128,
29817 IX86_BUILTIN_RNDSCALEPD128,
29818 IX86_BUILTIN_VTERNLOGQ256_MASK,
29819 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29820 IX86_BUILTIN_VTERNLOGD256_MASK,
29821 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29822 IX86_BUILTIN_VTERNLOGQ128_MASK,
29823 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29824 IX86_BUILTIN_VTERNLOGD128_MASK,
29825 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29826 IX86_BUILTIN_SCALEFPD256,
29827 IX86_BUILTIN_SCALEFPS256,
29828 IX86_BUILTIN_SCALEFPD128,
29829 IX86_BUILTIN_SCALEFPS128,
29830 IX86_BUILTIN_VFMADDPD256_MASK,
29831 IX86_BUILTIN_VFMADDPD256_MASK3,
29832 IX86_BUILTIN_VFMADDPD256_MASKZ,
29833 IX86_BUILTIN_VFMADDPD128_MASK,
29834 IX86_BUILTIN_VFMADDPD128_MASK3,
29835 IX86_BUILTIN_VFMADDPD128_MASKZ,
29836 IX86_BUILTIN_VFMADDPS256_MASK,
29837 IX86_BUILTIN_VFMADDPS256_MASK3,
29838 IX86_BUILTIN_VFMADDPS256_MASKZ,
29839 IX86_BUILTIN_VFMADDPS128_MASK,
29840 IX86_BUILTIN_VFMADDPS128_MASK3,
29841 IX86_BUILTIN_VFMADDPS128_MASKZ,
29842 IX86_BUILTIN_VFMSUBPD256_MASK3,
29843 IX86_BUILTIN_VFMSUBPD128_MASK3,
29844 IX86_BUILTIN_VFMSUBPS256_MASK3,
29845 IX86_BUILTIN_VFMSUBPS128_MASK3,
29846 IX86_BUILTIN_VFNMADDPD256_MASK,
29847 IX86_BUILTIN_VFNMADDPD128_MASK,
29848 IX86_BUILTIN_VFNMADDPS256_MASK,
29849 IX86_BUILTIN_VFNMADDPS128_MASK,
29850 IX86_BUILTIN_VFNMSUBPD256_MASK,
29851 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29852 IX86_BUILTIN_VFNMSUBPD128_MASK,
29853 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29854 IX86_BUILTIN_VFNMSUBPS256_MASK,
29855 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29856 IX86_BUILTIN_VFNMSUBPS128_MASK,
29857 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29858 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29859 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29860 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29861 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29862 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29863 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29864 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29865 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29866 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29867 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29868 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29869 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29870 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29871 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29872 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29873 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29874 IX86_BUILTIN_INSERTF64X2_256,
29875 IX86_BUILTIN_INSERTI64X2_256,
29876 IX86_BUILTIN_PSRAVV16HI,
29877 IX86_BUILTIN_PSRAVV8HI,
29878 IX86_BUILTIN_PMADDUBSW256_MASK,
29879 IX86_BUILTIN_PMADDUBSW128_MASK,
29880 IX86_BUILTIN_PMADDWD256_MASK,
29881 IX86_BUILTIN_PMADDWD128_MASK,
29882 IX86_BUILTIN_PSRLVV16HI,
29883 IX86_BUILTIN_PSRLVV8HI,
29884 IX86_BUILTIN_CVTPS2DQ256_MASK,
29885 IX86_BUILTIN_CVTPS2DQ128_MASK,
29886 IX86_BUILTIN_CVTPS2UDQ256,
29887 IX86_BUILTIN_CVTPS2UDQ128,
29888 IX86_BUILTIN_CVTPS2QQ256,
29889 IX86_BUILTIN_CVTPS2QQ128,
29890 IX86_BUILTIN_CVTPS2UQQ256,
29891 IX86_BUILTIN_CVTPS2UQQ128,
29892 IX86_BUILTIN_GETMANTPS256,
29893 IX86_BUILTIN_GETMANTPS128,
29894 IX86_BUILTIN_GETMANTPD256,
29895 IX86_BUILTIN_GETMANTPD128,
29896 IX86_BUILTIN_MOVDDUP256_MASK,
29897 IX86_BUILTIN_MOVDDUP128_MASK,
29898 IX86_BUILTIN_MOVSHDUP256_MASK,
29899 IX86_BUILTIN_MOVSHDUP128_MASK,
29900 IX86_BUILTIN_MOVSLDUP256_MASK,
29901 IX86_BUILTIN_MOVSLDUP128_MASK,
29902 IX86_BUILTIN_CVTQQ2PS256,
29903 IX86_BUILTIN_CVTQQ2PS128,
29904 IX86_BUILTIN_CVTUQQ2PS256,
29905 IX86_BUILTIN_CVTUQQ2PS128,
29906 IX86_BUILTIN_CVTQQ2PD256,
29907 IX86_BUILTIN_CVTQQ2PD128,
29908 IX86_BUILTIN_CVTUQQ2PD256,
29909 IX86_BUILTIN_CVTUQQ2PD128,
29910 IX86_BUILTIN_VPERMT2VARQ256,
29911 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29912 IX86_BUILTIN_VPERMT2VARD256,
29913 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29914 IX86_BUILTIN_VPERMI2VARQ256,
29915 IX86_BUILTIN_VPERMI2VARD256,
29916 IX86_BUILTIN_VPERMT2VARPD256,
29917 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29918 IX86_BUILTIN_VPERMT2VARPS256,
29919 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29920 IX86_BUILTIN_VPERMI2VARPD256,
29921 IX86_BUILTIN_VPERMI2VARPS256,
29922 IX86_BUILTIN_VPERMT2VARQ128,
29923 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29924 IX86_BUILTIN_VPERMT2VARD128,
29925 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29926 IX86_BUILTIN_VPERMI2VARQ128,
29927 IX86_BUILTIN_VPERMI2VARD128,
29928 IX86_BUILTIN_VPERMT2VARPD128,
29929 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29930 IX86_BUILTIN_VPERMT2VARPS128,
29931 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29932 IX86_BUILTIN_VPERMI2VARPD128,
29933 IX86_BUILTIN_VPERMI2VARPS128,
29934 IX86_BUILTIN_PSHUFB256_MASK,
29935 IX86_BUILTIN_PSHUFB128_MASK,
29936 IX86_BUILTIN_PSHUFHW256_MASK,
29937 IX86_BUILTIN_PSHUFHW128_MASK,
29938 IX86_BUILTIN_PSHUFLW256_MASK,
29939 IX86_BUILTIN_PSHUFLW128_MASK,
29940 IX86_BUILTIN_PSHUFD256_MASK,
29941 IX86_BUILTIN_PSHUFD128_MASK,
29942 IX86_BUILTIN_SHUFPD256_MASK,
29943 IX86_BUILTIN_SHUFPD128_MASK,
29944 IX86_BUILTIN_SHUFPS256_MASK,
29945 IX86_BUILTIN_SHUFPS128_MASK,
29946 IX86_BUILTIN_PROLVQ256,
29947 IX86_BUILTIN_PROLVQ128,
29948 IX86_BUILTIN_PROLQ256,
29949 IX86_BUILTIN_PROLQ128,
29950 IX86_BUILTIN_PRORVQ256,
29951 IX86_BUILTIN_PRORVQ128,
29952 IX86_BUILTIN_PRORQ256,
29953 IX86_BUILTIN_PRORQ128,
29954 IX86_BUILTIN_PSRAVQ128,
29955 IX86_BUILTIN_PSRAVQ256,
29956 IX86_BUILTIN_PSLLVV4DI_MASK,
29957 IX86_BUILTIN_PSLLVV2DI_MASK,
29958 IX86_BUILTIN_PSLLVV8SI_MASK,
29959 IX86_BUILTIN_PSLLVV4SI_MASK,
29960 IX86_BUILTIN_PSRAVV8SI_MASK,
29961 IX86_BUILTIN_PSRAVV4SI_MASK,
29962 IX86_BUILTIN_PSRLVV4DI_MASK,
29963 IX86_BUILTIN_PSRLVV2DI_MASK,
29964 IX86_BUILTIN_PSRLVV8SI_MASK,
29965 IX86_BUILTIN_PSRLVV4SI_MASK,
29966 IX86_BUILTIN_PSRAWI256_MASK,
29967 IX86_BUILTIN_PSRAW256_MASK,
29968 IX86_BUILTIN_PSRAWI128_MASK,
29969 IX86_BUILTIN_PSRAW128_MASK,
29970 IX86_BUILTIN_PSRLWI256_MASK,
29971 IX86_BUILTIN_PSRLW256_MASK,
29972 IX86_BUILTIN_PSRLWI128_MASK,
29973 IX86_BUILTIN_PSRLW128_MASK,
29974 IX86_BUILTIN_PRORVD256,
29975 IX86_BUILTIN_PROLVD256,
29976 IX86_BUILTIN_PRORD256,
29977 IX86_BUILTIN_PROLD256,
29978 IX86_BUILTIN_PRORVD128,
29979 IX86_BUILTIN_PROLVD128,
29980 IX86_BUILTIN_PRORD128,
29981 IX86_BUILTIN_PROLD128,
29982 IX86_BUILTIN_FPCLASSPD256,
29983 IX86_BUILTIN_FPCLASSPD128,
29984 IX86_BUILTIN_FPCLASSSD,
29985 IX86_BUILTIN_FPCLASSPS256,
29986 IX86_BUILTIN_FPCLASSPS128,
29987 IX86_BUILTIN_FPCLASSSS,
29988 IX86_BUILTIN_CVTB2MASK128,
29989 IX86_BUILTIN_CVTB2MASK256,
29990 IX86_BUILTIN_CVTW2MASK128,
29991 IX86_BUILTIN_CVTW2MASK256,
29992 IX86_BUILTIN_CVTD2MASK128,
29993 IX86_BUILTIN_CVTD2MASK256,
29994 IX86_BUILTIN_CVTQ2MASK128,
29995 IX86_BUILTIN_CVTQ2MASK256,
29996 IX86_BUILTIN_CVTMASK2B128,
29997 IX86_BUILTIN_CVTMASK2B256,
29998 IX86_BUILTIN_CVTMASK2W128,
29999 IX86_BUILTIN_CVTMASK2W256,
30000 IX86_BUILTIN_CVTMASK2D128,
30001 IX86_BUILTIN_CVTMASK2D256,
30002 IX86_BUILTIN_CVTMASK2Q128,
30003 IX86_BUILTIN_CVTMASK2Q256,
30004 IX86_BUILTIN_PCMPEQB128_MASK,
30005 IX86_BUILTIN_PCMPEQB256_MASK,
30006 IX86_BUILTIN_PCMPEQW128_MASK,
30007 IX86_BUILTIN_PCMPEQW256_MASK,
30008 IX86_BUILTIN_PCMPEQD128_MASK,
30009 IX86_BUILTIN_PCMPEQD256_MASK,
30010 IX86_BUILTIN_PCMPEQQ128_MASK,
30011 IX86_BUILTIN_PCMPEQQ256_MASK,
30012 IX86_BUILTIN_PCMPGTB128_MASK,
30013 IX86_BUILTIN_PCMPGTB256_MASK,
30014 IX86_BUILTIN_PCMPGTW128_MASK,
30015 IX86_BUILTIN_PCMPGTW256_MASK,
30016 IX86_BUILTIN_PCMPGTD128_MASK,
30017 IX86_BUILTIN_PCMPGTD256_MASK,
30018 IX86_BUILTIN_PCMPGTQ128_MASK,
30019 IX86_BUILTIN_PCMPGTQ256_MASK,
30020 IX86_BUILTIN_PTESTMB128,
30021 IX86_BUILTIN_PTESTMB256,
30022 IX86_BUILTIN_PTESTMW128,
30023 IX86_BUILTIN_PTESTMW256,
30024 IX86_BUILTIN_PTESTMD128,
30025 IX86_BUILTIN_PTESTMD256,
30026 IX86_BUILTIN_PTESTMQ128,
30027 IX86_BUILTIN_PTESTMQ256,
30028 IX86_BUILTIN_PTESTNMB128,
30029 IX86_BUILTIN_PTESTNMB256,
30030 IX86_BUILTIN_PTESTNMW128,
30031 IX86_BUILTIN_PTESTNMW256,
30032 IX86_BUILTIN_PTESTNMD128,
30033 IX86_BUILTIN_PTESTNMD256,
30034 IX86_BUILTIN_PTESTNMQ128,
30035 IX86_BUILTIN_PTESTNMQ256,
30036 IX86_BUILTIN_PBROADCASTMB128,
30037 IX86_BUILTIN_PBROADCASTMB256,
30038 IX86_BUILTIN_PBROADCASTMW128,
30039 IX86_BUILTIN_PBROADCASTMW256,
30040 IX86_BUILTIN_COMPRESSPD256,
30041 IX86_BUILTIN_COMPRESSPD128,
30042 IX86_BUILTIN_COMPRESSPS256,
30043 IX86_BUILTIN_COMPRESSPS128,
30044 IX86_BUILTIN_PCOMPRESSQ256,
30045 IX86_BUILTIN_PCOMPRESSQ128,
30046 IX86_BUILTIN_PCOMPRESSD256,
30047 IX86_BUILTIN_PCOMPRESSD128,
30048 IX86_BUILTIN_EXPANDPD256,
30049 IX86_BUILTIN_EXPANDPD128,
30050 IX86_BUILTIN_EXPANDPS256,
30051 IX86_BUILTIN_EXPANDPS128,
30052 IX86_BUILTIN_PEXPANDQ256,
30053 IX86_BUILTIN_PEXPANDQ128,
30054 IX86_BUILTIN_PEXPANDD256,
30055 IX86_BUILTIN_PEXPANDD128,
30056 IX86_BUILTIN_EXPANDPD256Z,
30057 IX86_BUILTIN_EXPANDPD128Z,
30058 IX86_BUILTIN_EXPANDPS256Z,
30059 IX86_BUILTIN_EXPANDPS128Z,
30060 IX86_BUILTIN_PEXPANDQ256Z,
30061 IX86_BUILTIN_PEXPANDQ128Z,
30062 IX86_BUILTIN_PEXPANDD256Z,
30063 IX86_BUILTIN_PEXPANDD128Z,
30064 IX86_BUILTIN_PMAXSD256_MASK,
30065 IX86_BUILTIN_PMINSD256_MASK,
30066 IX86_BUILTIN_PMAXUD256_MASK,
30067 IX86_BUILTIN_PMINUD256_MASK,
30068 IX86_BUILTIN_PMAXSD128_MASK,
30069 IX86_BUILTIN_PMINSD128_MASK,
30070 IX86_BUILTIN_PMAXUD128_MASK,
30071 IX86_BUILTIN_PMINUD128_MASK,
30072 IX86_BUILTIN_PMAXSQ256_MASK,
30073 IX86_BUILTIN_PMINSQ256_MASK,
30074 IX86_BUILTIN_PMAXUQ256_MASK,
30075 IX86_BUILTIN_PMINUQ256_MASK,
30076 IX86_BUILTIN_PMAXSQ128_MASK,
30077 IX86_BUILTIN_PMINSQ128_MASK,
30078 IX86_BUILTIN_PMAXUQ128_MASK,
30079 IX86_BUILTIN_PMINUQ128_MASK,
30080 IX86_BUILTIN_PMINSB256_MASK,
30081 IX86_BUILTIN_PMINUB256_MASK,
30082 IX86_BUILTIN_PMAXSB256_MASK,
30083 IX86_BUILTIN_PMAXUB256_MASK,
30084 IX86_BUILTIN_PMINSB128_MASK,
30085 IX86_BUILTIN_PMINUB128_MASK,
30086 IX86_BUILTIN_PMAXSB128_MASK,
30087 IX86_BUILTIN_PMAXUB128_MASK,
30088 IX86_BUILTIN_PMINSW256_MASK,
30089 IX86_BUILTIN_PMINUW256_MASK,
30090 IX86_BUILTIN_PMAXSW256_MASK,
30091 IX86_BUILTIN_PMAXUW256_MASK,
30092 IX86_BUILTIN_PMINSW128_MASK,
30093 IX86_BUILTIN_PMINUW128_MASK,
30094 IX86_BUILTIN_PMAXSW128_MASK,
30095 IX86_BUILTIN_PMAXUW128_MASK,
30096 IX86_BUILTIN_VPCONFLICTQ256,
30097 IX86_BUILTIN_VPCONFLICTD256,
30098 IX86_BUILTIN_VPCLZCNTQ256,
30099 IX86_BUILTIN_VPCLZCNTD256,
30100 IX86_BUILTIN_UNPCKHPD256_MASK,
30101 IX86_BUILTIN_UNPCKHPD128_MASK,
30102 IX86_BUILTIN_UNPCKHPS256_MASK,
30103 IX86_BUILTIN_UNPCKHPS128_MASK,
30104 IX86_BUILTIN_UNPCKLPD256_MASK,
30105 IX86_BUILTIN_UNPCKLPD128_MASK,
30106 IX86_BUILTIN_UNPCKLPS256_MASK,
30107 IX86_BUILTIN_VPCONFLICTQ128,
30108 IX86_BUILTIN_VPCONFLICTD128,
30109 IX86_BUILTIN_VPCLZCNTQ128,
30110 IX86_BUILTIN_VPCLZCNTD128,
30111 IX86_BUILTIN_UNPCKLPS128_MASK,
30112 IX86_BUILTIN_ALIGND256,
30113 IX86_BUILTIN_ALIGNQ256,
30114 IX86_BUILTIN_ALIGND128,
30115 IX86_BUILTIN_ALIGNQ128,
30116 IX86_BUILTIN_CVTPS2PH256_MASK,
30117 IX86_BUILTIN_CVTPS2PH_MASK,
30118 IX86_BUILTIN_CVTPH2PS_MASK,
30119 IX86_BUILTIN_CVTPH2PS256_MASK,
30120 IX86_BUILTIN_PUNPCKHDQ128_MASK,
30121 IX86_BUILTIN_PUNPCKHDQ256_MASK,
30122 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30123 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30124 IX86_BUILTIN_PUNPCKLDQ128_MASK,
30125 IX86_BUILTIN_PUNPCKLDQ256_MASK,
30126 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30127 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30128 IX86_BUILTIN_PUNPCKHBW128_MASK,
30129 IX86_BUILTIN_PUNPCKHBW256_MASK,
30130 IX86_BUILTIN_PUNPCKHWD128_MASK,
30131 IX86_BUILTIN_PUNPCKHWD256_MASK,
30132 IX86_BUILTIN_PUNPCKLBW128_MASK,
30133 IX86_BUILTIN_PUNPCKLBW256_MASK,
30134 IX86_BUILTIN_PUNPCKLWD128_MASK,
30135 IX86_BUILTIN_PUNPCKLWD256_MASK,
30136 IX86_BUILTIN_PSLLVV16HI,
30137 IX86_BUILTIN_PSLLVV8HI,
30138 IX86_BUILTIN_PACKSSDW256_MASK,
30139 IX86_BUILTIN_PACKSSDW128_MASK,
30140 IX86_BUILTIN_PACKUSDW256_MASK,
30141 IX86_BUILTIN_PACKUSDW128_MASK,
30142 IX86_BUILTIN_PAVGB256_MASK,
30143 IX86_BUILTIN_PAVGW256_MASK,
30144 IX86_BUILTIN_PAVGB128_MASK,
30145 IX86_BUILTIN_PAVGW128_MASK,
30146 IX86_BUILTIN_VPERMVARSF256_MASK,
30147 IX86_BUILTIN_VPERMVARDF256_MASK,
30148 IX86_BUILTIN_VPERMDF256_MASK,
30149 IX86_BUILTIN_PABSB256_MASK,
30150 IX86_BUILTIN_PABSB128_MASK,
30151 IX86_BUILTIN_PABSW256_MASK,
30152 IX86_BUILTIN_PABSW128_MASK,
30153 IX86_BUILTIN_VPERMILVARPD_MASK,
30154 IX86_BUILTIN_VPERMILVARPS_MASK,
30155 IX86_BUILTIN_VPERMILVARPD256_MASK,
30156 IX86_BUILTIN_VPERMILVARPS256_MASK,
30157 IX86_BUILTIN_VPERMILPD_MASK,
30158 IX86_BUILTIN_VPERMILPS_MASK,
30159 IX86_BUILTIN_VPERMILPD256_MASK,
30160 IX86_BUILTIN_VPERMILPS256_MASK,
30161 IX86_BUILTIN_BLENDMQ256,
30162 IX86_BUILTIN_BLENDMD256,
30163 IX86_BUILTIN_BLENDMPD256,
30164 IX86_BUILTIN_BLENDMPS256,
30165 IX86_BUILTIN_BLENDMQ128,
30166 IX86_BUILTIN_BLENDMD128,
30167 IX86_BUILTIN_BLENDMPD128,
30168 IX86_BUILTIN_BLENDMPS128,
30169 IX86_BUILTIN_BLENDMW256,
30170 IX86_BUILTIN_BLENDMB256,
30171 IX86_BUILTIN_BLENDMW128,
30172 IX86_BUILTIN_BLENDMB128,
30173 IX86_BUILTIN_PMULLD256_MASK,
30174 IX86_BUILTIN_PMULLD128_MASK,
30175 IX86_BUILTIN_PMULUDQ256_MASK,
30176 IX86_BUILTIN_PMULDQ256_MASK,
30177 IX86_BUILTIN_PMULDQ128_MASK,
30178 IX86_BUILTIN_PMULUDQ128_MASK,
30179 IX86_BUILTIN_CVTPD2PS256_MASK,
30180 IX86_BUILTIN_CVTPD2PS_MASK,
30181 IX86_BUILTIN_VPERMVARSI256_MASK,
30182 IX86_BUILTIN_VPERMVARDI256_MASK,
30183 IX86_BUILTIN_VPERMDI256_MASK,
30184 IX86_BUILTIN_CMPQ256,
30185 IX86_BUILTIN_CMPD256,
30186 IX86_BUILTIN_UCMPQ256,
30187 IX86_BUILTIN_UCMPD256,
30188 IX86_BUILTIN_CMPB256,
30189 IX86_BUILTIN_CMPW256,
30190 IX86_BUILTIN_UCMPB256,
30191 IX86_BUILTIN_UCMPW256,
30192 IX86_BUILTIN_CMPPD256_MASK,
30193 IX86_BUILTIN_CMPPS256_MASK,
30194 IX86_BUILTIN_CMPQ128,
30195 IX86_BUILTIN_CMPD128,
30196 IX86_BUILTIN_UCMPQ128,
30197 IX86_BUILTIN_UCMPD128,
30198 IX86_BUILTIN_CMPB128,
30199 IX86_BUILTIN_CMPW128,
30200 IX86_BUILTIN_UCMPB128,
30201 IX86_BUILTIN_UCMPW128,
30202 IX86_BUILTIN_CMPPD128_MASK,
30203 IX86_BUILTIN_CMPPS128_MASK,
30205 IX86_BUILTIN_GATHER3SIV8SF,
30206 IX86_BUILTIN_GATHER3SIV4SF,
30207 IX86_BUILTIN_GATHER3SIV4DF,
30208 IX86_BUILTIN_GATHER3SIV2DF,
30209 IX86_BUILTIN_GATHER3DIV8SF,
30210 IX86_BUILTIN_GATHER3DIV4SF,
30211 IX86_BUILTIN_GATHER3DIV4DF,
30212 IX86_BUILTIN_GATHER3DIV2DF,
30213 IX86_BUILTIN_GATHER3SIV8SI,
30214 IX86_BUILTIN_GATHER3SIV4SI,
30215 IX86_BUILTIN_GATHER3SIV4DI,
30216 IX86_BUILTIN_GATHER3SIV2DI,
30217 IX86_BUILTIN_GATHER3DIV8SI,
30218 IX86_BUILTIN_GATHER3DIV4SI,
30219 IX86_BUILTIN_GATHER3DIV4DI,
30220 IX86_BUILTIN_GATHER3DIV2DI,
30221 IX86_BUILTIN_SCATTERSIV8SF,
30222 IX86_BUILTIN_SCATTERSIV4SF,
30223 IX86_BUILTIN_SCATTERSIV4DF,
30224 IX86_BUILTIN_SCATTERSIV2DF,
30225 IX86_BUILTIN_SCATTERDIV8SF,
30226 IX86_BUILTIN_SCATTERDIV4SF,
30227 IX86_BUILTIN_SCATTERDIV4DF,
30228 IX86_BUILTIN_SCATTERDIV2DF,
30229 IX86_BUILTIN_SCATTERSIV8SI,
30230 IX86_BUILTIN_SCATTERSIV4SI,
30231 IX86_BUILTIN_SCATTERSIV4DI,
30232 IX86_BUILTIN_SCATTERSIV2DI,
30233 IX86_BUILTIN_SCATTERDIV8SI,
30234 IX86_BUILTIN_SCATTERDIV4SI,
30235 IX86_BUILTIN_SCATTERDIV4DI,
30236 IX86_BUILTIN_SCATTERDIV2DI,
30238 /* AVX512DQ. */
30239 IX86_BUILTIN_RANGESD128,
30240 IX86_BUILTIN_RANGESS128,
30241 IX86_BUILTIN_KUNPCKWD,
30242 IX86_BUILTIN_KUNPCKDQ,
30243 IX86_BUILTIN_BROADCASTF32x2_512,
30244 IX86_BUILTIN_BROADCASTI32x2_512,
30245 IX86_BUILTIN_BROADCASTF64X2_512,
30246 IX86_BUILTIN_BROADCASTI64X2_512,
30247 IX86_BUILTIN_BROADCASTF32X8_512,
30248 IX86_BUILTIN_BROADCASTI32X8_512,
30249 IX86_BUILTIN_EXTRACTF64X2_512,
30250 IX86_BUILTIN_EXTRACTF32X8,
30251 IX86_BUILTIN_EXTRACTI64X2_512,
30252 IX86_BUILTIN_EXTRACTI32X8,
30253 IX86_BUILTIN_REDUCEPD512_MASK,
30254 IX86_BUILTIN_REDUCEPS512_MASK,
30255 IX86_BUILTIN_PMULLQ512,
30256 IX86_BUILTIN_XORPD512,
30257 IX86_BUILTIN_XORPS512,
30258 IX86_BUILTIN_ORPD512,
30259 IX86_BUILTIN_ORPS512,
30260 IX86_BUILTIN_ANDPD512,
30261 IX86_BUILTIN_ANDPS512,
30262 IX86_BUILTIN_ANDNPD512,
30263 IX86_BUILTIN_ANDNPS512,
30264 IX86_BUILTIN_INSERTF32X8,
30265 IX86_BUILTIN_INSERTI32X8,
30266 IX86_BUILTIN_INSERTF64X2_512,
30267 IX86_BUILTIN_INSERTI64X2_512,
30268 IX86_BUILTIN_FPCLASSPD512,
30269 IX86_BUILTIN_FPCLASSPS512,
30270 IX86_BUILTIN_CVTD2MASK512,
30271 IX86_BUILTIN_CVTQ2MASK512,
30272 IX86_BUILTIN_CVTMASK2D512,
30273 IX86_BUILTIN_CVTMASK2Q512,
30274 IX86_BUILTIN_CVTPD2QQ512,
30275 IX86_BUILTIN_CVTPS2QQ512,
30276 IX86_BUILTIN_CVTPD2UQQ512,
30277 IX86_BUILTIN_CVTPS2UQQ512,
30278 IX86_BUILTIN_CVTQQ2PS512,
30279 IX86_BUILTIN_CVTUQQ2PS512,
30280 IX86_BUILTIN_CVTQQ2PD512,
30281 IX86_BUILTIN_CVTUQQ2PD512,
30282 IX86_BUILTIN_CVTTPS2QQ512,
30283 IX86_BUILTIN_CVTTPS2UQQ512,
30284 IX86_BUILTIN_CVTTPD2QQ512,
30285 IX86_BUILTIN_CVTTPD2UQQ512,
30286 IX86_BUILTIN_RANGEPS512,
30287 IX86_BUILTIN_RANGEPD512,
30289 /* AVX512BW. */
30290 IX86_BUILTIN_PACKUSDW512,
30291 IX86_BUILTIN_PACKSSDW512,
30292 IX86_BUILTIN_LOADDQUHI512_MASK,
30293 IX86_BUILTIN_LOADDQUQI512_MASK,
30294 IX86_BUILTIN_PSLLDQ512,
30295 IX86_BUILTIN_PSRLDQ512,
30296 IX86_BUILTIN_STOREDQUHI512_MASK,
30297 IX86_BUILTIN_STOREDQUQI512_MASK,
30298 IX86_BUILTIN_PALIGNR512,
30299 IX86_BUILTIN_PALIGNR512_MASK,
30300 IX86_BUILTIN_MOVDQUHI512_MASK,
30301 IX86_BUILTIN_MOVDQUQI512_MASK,
30302 IX86_BUILTIN_PSADBW512,
30303 IX86_BUILTIN_DBPSADBW512,
30304 IX86_BUILTIN_PBROADCASTB512,
30305 IX86_BUILTIN_PBROADCASTB512_GPR,
30306 IX86_BUILTIN_PBROADCASTW512,
30307 IX86_BUILTIN_PBROADCASTW512_GPR,
30308 IX86_BUILTIN_PMOVSXBW512_MASK,
30309 IX86_BUILTIN_PMOVZXBW512_MASK,
30310 IX86_BUILTIN_VPERMVARHI512_MASK,
30311 IX86_BUILTIN_VPERMT2VARHI512,
30312 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30313 IX86_BUILTIN_VPERMI2VARHI512,
30314 IX86_BUILTIN_PAVGB512,
30315 IX86_BUILTIN_PAVGW512,
30316 IX86_BUILTIN_PADDB512,
30317 IX86_BUILTIN_PSUBB512,
30318 IX86_BUILTIN_PSUBSB512,
30319 IX86_BUILTIN_PADDSB512,
30320 IX86_BUILTIN_PSUBUSB512,
30321 IX86_BUILTIN_PADDUSB512,
30322 IX86_BUILTIN_PSUBW512,
30323 IX86_BUILTIN_PADDW512,
30324 IX86_BUILTIN_PSUBSW512,
30325 IX86_BUILTIN_PADDSW512,
30326 IX86_BUILTIN_PSUBUSW512,
30327 IX86_BUILTIN_PADDUSW512,
30328 IX86_BUILTIN_PMAXUW512,
30329 IX86_BUILTIN_PMAXSW512,
30330 IX86_BUILTIN_PMINUW512,
30331 IX86_BUILTIN_PMINSW512,
30332 IX86_BUILTIN_PMAXUB512,
30333 IX86_BUILTIN_PMAXSB512,
30334 IX86_BUILTIN_PMINUB512,
30335 IX86_BUILTIN_PMINSB512,
30336 IX86_BUILTIN_PMOVWB512,
30337 IX86_BUILTIN_PMOVSWB512,
30338 IX86_BUILTIN_PMOVUSWB512,
30339 IX86_BUILTIN_PMULHRSW512_MASK,
30340 IX86_BUILTIN_PMULHUW512_MASK,
30341 IX86_BUILTIN_PMULHW512_MASK,
30342 IX86_BUILTIN_PMULLW512_MASK,
30343 IX86_BUILTIN_PSLLWI512_MASK,
30344 IX86_BUILTIN_PSLLW512_MASK,
30345 IX86_BUILTIN_PACKSSWB512,
30346 IX86_BUILTIN_PACKUSWB512,
30347 IX86_BUILTIN_PSRAVV32HI,
30348 IX86_BUILTIN_PMADDUBSW512_MASK,
30349 IX86_BUILTIN_PMADDWD512_MASK,
30350 IX86_BUILTIN_PSRLVV32HI,
30351 IX86_BUILTIN_PUNPCKHBW512,
30352 IX86_BUILTIN_PUNPCKHWD512,
30353 IX86_BUILTIN_PUNPCKLBW512,
30354 IX86_BUILTIN_PUNPCKLWD512,
30355 IX86_BUILTIN_PSHUFB512,
30356 IX86_BUILTIN_PSHUFHW512,
30357 IX86_BUILTIN_PSHUFLW512,
30358 IX86_BUILTIN_PSRAWI512,
30359 IX86_BUILTIN_PSRAW512,
30360 IX86_BUILTIN_PSRLWI512,
30361 IX86_BUILTIN_PSRLW512,
30362 IX86_BUILTIN_CVTB2MASK512,
30363 IX86_BUILTIN_CVTW2MASK512,
30364 IX86_BUILTIN_CVTMASK2B512,
30365 IX86_BUILTIN_CVTMASK2W512,
30366 IX86_BUILTIN_PCMPEQB512_MASK,
30367 IX86_BUILTIN_PCMPEQW512_MASK,
30368 IX86_BUILTIN_PCMPGTB512_MASK,
30369 IX86_BUILTIN_PCMPGTW512_MASK,
30370 IX86_BUILTIN_PTESTMB512,
30371 IX86_BUILTIN_PTESTMW512,
30372 IX86_BUILTIN_PTESTNMB512,
30373 IX86_BUILTIN_PTESTNMW512,
30374 IX86_BUILTIN_PSLLVV32HI,
30375 IX86_BUILTIN_PABSB512,
30376 IX86_BUILTIN_PABSW512,
30377 IX86_BUILTIN_BLENDMW512,
30378 IX86_BUILTIN_BLENDMB512,
30379 IX86_BUILTIN_CMPB512,
30380 IX86_BUILTIN_CMPW512,
30381 IX86_BUILTIN_UCMPB512,
30382 IX86_BUILTIN_UCMPW512,
30384 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30385 where all operands are 32-byte or 64-byte wide respectively. */
30386 IX86_BUILTIN_GATHERALTSIV4DF,
30387 IX86_BUILTIN_GATHERALTDIV8SF,
30388 IX86_BUILTIN_GATHERALTSIV4DI,
30389 IX86_BUILTIN_GATHERALTDIV8SI,
30390 IX86_BUILTIN_GATHER3ALTDIV16SF,
30391 IX86_BUILTIN_GATHER3ALTDIV16SI,
30392 IX86_BUILTIN_GATHER3ALTSIV4DF,
30393 IX86_BUILTIN_GATHER3ALTDIV8SF,
30394 IX86_BUILTIN_GATHER3ALTSIV4DI,
30395 IX86_BUILTIN_GATHER3ALTDIV8SI,
30396 IX86_BUILTIN_GATHER3ALTSIV8DF,
30397 IX86_BUILTIN_GATHER3ALTSIV8DI,
30398 IX86_BUILTIN_GATHER3DIV16SF,
30399 IX86_BUILTIN_GATHER3DIV16SI,
30400 IX86_BUILTIN_GATHER3DIV8DF,
30401 IX86_BUILTIN_GATHER3DIV8DI,
30402 IX86_BUILTIN_GATHER3SIV16SF,
30403 IX86_BUILTIN_GATHER3SIV16SI,
30404 IX86_BUILTIN_GATHER3SIV8DF,
30405 IX86_BUILTIN_GATHER3SIV8DI,
30406 IX86_BUILTIN_SCATTERDIV16SF,
30407 IX86_BUILTIN_SCATTERDIV16SI,
30408 IX86_BUILTIN_SCATTERDIV8DF,
30409 IX86_BUILTIN_SCATTERDIV8DI,
30410 IX86_BUILTIN_SCATTERSIV16SF,
30411 IX86_BUILTIN_SCATTERSIV16SI,
30412 IX86_BUILTIN_SCATTERSIV8DF,
30413 IX86_BUILTIN_SCATTERSIV8DI,
30415 /* AVX512PF */
30416 IX86_BUILTIN_GATHERPFQPD,
30417 IX86_BUILTIN_GATHERPFDPS,
30418 IX86_BUILTIN_GATHERPFDPD,
30419 IX86_BUILTIN_GATHERPFQPS,
30420 IX86_BUILTIN_SCATTERPFDPD,
30421 IX86_BUILTIN_SCATTERPFDPS,
30422 IX86_BUILTIN_SCATTERPFQPD,
30423 IX86_BUILTIN_SCATTERPFQPS,
30425 /* AVX-512ER */
30426 IX86_BUILTIN_EXP2PD_MASK,
30427 IX86_BUILTIN_EXP2PS_MASK,
30428 IX86_BUILTIN_EXP2PS,
30429 IX86_BUILTIN_RCP28PD,
30430 IX86_BUILTIN_RCP28PS,
30431 IX86_BUILTIN_RCP28SD,
30432 IX86_BUILTIN_RCP28SS,
30433 IX86_BUILTIN_RSQRT28PD,
30434 IX86_BUILTIN_RSQRT28PS,
30435 IX86_BUILTIN_RSQRT28SD,
30436 IX86_BUILTIN_RSQRT28SS,
30438 /* AVX-512IFMA */
30439 IX86_BUILTIN_VPMADD52LUQ512,
30440 IX86_BUILTIN_VPMADD52HUQ512,
30441 IX86_BUILTIN_VPMADD52LUQ256,
30442 IX86_BUILTIN_VPMADD52HUQ256,
30443 IX86_BUILTIN_VPMADD52LUQ128,
30444 IX86_BUILTIN_VPMADD52HUQ128,
30445 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30446 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30447 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30448 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30449 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30450 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30452 /* AVX-512VBMI */
30453 IX86_BUILTIN_VPMULTISHIFTQB512,
30454 IX86_BUILTIN_VPMULTISHIFTQB256,
30455 IX86_BUILTIN_VPMULTISHIFTQB128,
30456 IX86_BUILTIN_VPERMVARQI512_MASK,
30457 IX86_BUILTIN_VPERMT2VARQI512,
30458 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30459 IX86_BUILTIN_VPERMI2VARQI512,
30460 IX86_BUILTIN_VPERMVARQI256_MASK,
30461 IX86_BUILTIN_VPERMVARQI128_MASK,
30462 IX86_BUILTIN_VPERMT2VARQI256,
30463 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30464 IX86_BUILTIN_VPERMT2VARQI128,
30465 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30466 IX86_BUILTIN_VPERMI2VARQI256,
30467 IX86_BUILTIN_VPERMI2VARQI128,
30469 /* SHA builtins. */
30470 IX86_BUILTIN_SHA1MSG1,
30471 IX86_BUILTIN_SHA1MSG2,
30472 IX86_BUILTIN_SHA1NEXTE,
30473 IX86_BUILTIN_SHA1RNDS4,
30474 IX86_BUILTIN_SHA256MSG1,
30475 IX86_BUILTIN_SHA256MSG2,
30476 IX86_BUILTIN_SHA256RNDS2,
30478 /* CLWB instructions. */
30479 IX86_BUILTIN_CLWB,
30481 /* PCOMMIT instructions. */
30482 IX86_BUILTIN_PCOMMIT,
30484 /* CLFLUSHOPT instructions. */
30485 IX86_BUILTIN_CLFLUSHOPT,
30487 /* TFmode support builtins. */
30488 IX86_BUILTIN_INFQ,
30489 IX86_BUILTIN_HUGE_VALQ,
30490 IX86_BUILTIN_FABSQ,
30491 IX86_BUILTIN_COPYSIGNQ,
30493 /* Vectorizer support builtins. */
30494 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30495 IX86_BUILTIN_CPYSGNPS,
30496 IX86_BUILTIN_CPYSGNPD,
30497 IX86_BUILTIN_CPYSGNPS256,
30498 IX86_BUILTIN_CPYSGNPS512,
30499 IX86_BUILTIN_CPYSGNPD256,
30500 IX86_BUILTIN_CPYSGNPD512,
30501 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30502 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30505 /* FMA4 instructions. */
30506 IX86_BUILTIN_VFMADDSS,
30507 IX86_BUILTIN_VFMADDSD,
30508 IX86_BUILTIN_VFMADDPS,
30509 IX86_BUILTIN_VFMADDPD,
30510 IX86_BUILTIN_VFMADDPS256,
30511 IX86_BUILTIN_VFMADDPD256,
30512 IX86_BUILTIN_VFMADDSUBPS,
30513 IX86_BUILTIN_VFMADDSUBPD,
30514 IX86_BUILTIN_VFMADDSUBPS256,
30515 IX86_BUILTIN_VFMADDSUBPD256,
30517 /* FMA3 instructions. */
30518 IX86_BUILTIN_VFMADDSS3,
30519 IX86_BUILTIN_VFMADDSD3,
30521 /* XOP instructions. */
30522 IX86_BUILTIN_VPCMOV,
30523 IX86_BUILTIN_VPCMOV_V2DI,
30524 IX86_BUILTIN_VPCMOV_V4SI,
30525 IX86_BUILTIN_VPCMOV_V8HI,
30526 IX86_BUILTIN_VPCMOV_V16QI,
30527 IX86_BUILTIN_VPCMOV_V4SF,
30528 IX86_BUILTIN_VPCMOV_V2DF,
30529 IX86_BUILTIN_VPCMOV256,
30530 IX86_BUILTIN_VPCMOV_V4DI256,
30531 IX86_BUILTIN_VPCMOV_V8SI256,
30532 IX86_BUILTIN_VPCMOV_V16HI256,
30533 IX86_BUILTIN_VPCMOV_V32QI256,
30534 IX86_BUILTIN_VPCMOV_V8SF256,
30535 IX86_BUILTIN_VPCMOV_V4DF256,
30537 IX86_BUILTIN_VPPERM,
30539 IX86_BUILTIN_VPMACSSWW,
30540 IX86_BUILTIN_VPMACSWW,
30541 IX86_BUILTIN_VPMACSSWD,
30542 IX86_BUILTIN_VPMACSWD,
30543 IX86_BUILTIN_VPMACSSDD,
30544 IX86_BUILTIN_VPMACSDD,
30545 IX86_BUILTIN_VPMACSSDQL,
30546 IX86_BUILTIN_VPMACSSDQH,
30547 IX86_BUILTIN_VPMACSDQL,
30548 IX86_BUILTIN_VPMACSDQH,
30549 IX86_BUILTIN_VPMADCSSWD,
30550 IX86_BUILTIN_VPMADCSWD,
30552 IX86_BUILTIN_VPHADDBW,
30553 IX86_BUILTIN_VPHADDBD,
30554 IX86_BUILTIN_VPHADDBQ,
30555 IX86_BUILTIN_VPHADDWD,
30556 IX86_BUILTIN_VPHADDWQ,
30557 IX86_BUILTIN_VPHADDDQ,
30558 IX86_BUILTIN_VPHADDUBW,
30559 IX86_BUILTIN_VPHADDUBD,
30560 IX86_BUILTIN_VPHADDUBQ,
30561 IX86_BUILTIN_VPHADDUWD,
30562 IX86_BUILTIN_VPHADDUWQ,
30563 IX86_BUILTIN_VPHADDUDQ,
30564 IX86_BUILTIN_VPHSUBBW,
30565 IX86_BUILTIN_VPHSUBWD,
30566 IX86_BUILTIN_VPHSUBDQ,
30568 IX86_BUILTIN_VPROTB,
30569 IX86_BUILTIN_VPROTW,
30570 IX86_BUILTIN_VPROTD,
30571 IX86_BUILTIN_VPROTQ,
30572 IX86_BUILTIN_VPROTB_IMM,
30573 IX86_BUILTIN_VPROTW_IMM,
30574 IX86_BUILTIN_VPROTD_IMM,
30575 IX86_BUILTIN_VPROTQ_IMM,
30577 IX86_BUILTIN_VPSHLB,
30578 IX86_BUILTIN_VPSHLW,
30579 IX86_BUILTIN_VPSHLD,
30580 IX86_BUILTIN_VPSHLQ,
30581 IX86_BUILTIN_VPSHAB,
30582 IX86_BUILTIN_VPSHAW,
30583 IX86_BUILTIN_VPSHAD,
30584 IX86_BUILTIN_VPSHAQ,
30586 IX86_BUILTIN_VFRCZSS,
30587 IX86_BUILTIN_VFRCZSD,
30588 IX86_BUILTIN_VFRCZPS,
30589 IX86_BUILTIN_VFRCZPD,
30590 IX86_BUILTIN_VFRCZPS256,
30591 IX86_BUILTIN_VFRCZPD256,
30593 IX86_BUILTIN_VPCOMEQUB,
30594 IX86_BUILTIN_VPCOMNEUB,
30595 IX86_BUILTIN_VPCOMLTUB,
30596 IX86_BUILTIN_VPCOMLEUB,
30597 IX86_BUILTIN_VPCOMGTUB,
30598 IX86_BUILTIN_VPCOMGEUB,
30599 IX86_BUILTIN_VPCOMFALSEUB,
30600 IX86_BUILTIN_VPCOMTRUEUB,
30602 IX86_BUILTIN_VPCOMEQUW,
30603 IX86_BUILTIN_VPCOMNEUW,
30604 IX86_BUILTIN_VPCOMLTUW,
30605 IX86_BUILTIN_VPCOMLEUW,
30606 IX86_BUILTIN_VPCOMGTUW,
30607 IX86_BUILTIN_VPCOMGEUW,
30608 IX86_BUILTIN_VPCOMFALSEUW,
30609 IX86_BUILTIN_VPCOMTRUEUW,
30611 IX86_BUILTIN_VPCOMEQUD,
30612 IX86_BUILTIN_VPCOMNEUD,
30613 IX86_BUILTIN_VPCOMLTUD,
30614 IX86_BUILTIN_VPCOMLEUD,
30615 IX86_BUILTIN_VPCOMGTUD,
30616 IX86_BUILTIN_VPCOMGEUD,
30617 IX86_BUILTIN_VPCOMFALSEUD,
30618 IX86_BUILTIN_VPCOMTRUEUD,
30620 IX86_BUILTIN_VPCOMEQUQ,
30621 IX86_BUILTIN_VPCOMNEUQ,
30622 IX86_BUILTIN_VPCOMLTUQ,
30623 IX86_BUILTIN_VPCOMLEUQ,
30624 IX86_BUILTIN_VPCOMGTUQ,
30625 IX86_BUILTIN_VPCOMGEUQ,
30626 IX86_BUILTIN_VPCOMFALSEUQ,
30627 IX86_BUILTIN_VPCOMTRUEUQ,
30629 IX86_BUILTIN_VPCOMEQB,
30630 IX86_BUILTIN_VPCOMNEB,
30631 IX86_BUILTIN_VPCOMLTB,
30632 IX86_BUILTIN_VPCOMLEB,
30633 IX86_BUILTIN_VPCOMGTB,
30634 IX86_BUILTIN_VPCOMGEB,
30635 IX86_BUILTIN_VPCOMFALSEB,
30636 IX86_BUILTIN_VPCOMTRUEB,
30638 IX86_BUILTIN_VPCOMEQW,
30639 IX86_BUILTIN_VPCOMNEW,
30640 IX86_BUILTIN_VPCOMLTW,
30641 IX86_BUILTIN_VPCOMLEW,
30642 IX86_BUILTIN_VPCOMGTW,
30643 IX86_BUILTIN_VPCOMGEW,
30644 IX86_BUILTIN_VPCOMFALSEW,
30645 IX86_BUILTIN_VPCOMTRUEW,
30647 IX86_BUILTIN_VPCOMEQD,
30648 IX86_BUILTIN_VPCOMNED,
30649 IX86_BUILTIN_VPCOMLTD,
30650 IX86_BUILTIN_VPCOMLED,
30651 IX86_BUILTIN_VPCOMGTD,
30652 IX86_BUILTIN_VPCOMGED,
30653 IX86_BUILTIN_VPCOMFALSED,
30654 IX86_BUILTIN_VPCOMTRUED,
30656 IX86_BUILTIN_VPCOMEQQ,
30657 IX86_BUILTIN_VPCOMNEQ,
30658 IX86_BUILTIN_VPCOMLTQ,
30659 IX86_BUILTIN_VPCOMLEQ,
30660 IX86_BUILTIN_VPCOMGTQ,
30661 IX86_BUILTIN_VPCOMGEQ,
30662 IX86_BUILTIN_VPCOMFALSEQ,
30663 IX86_BUILTIN_VPCOMTRUEQ,
30665 /* LWP instructions. */
30666 IX86_BUILTIN_LLWPCB,
30667 IX86_BUILTIN_SLWPCB,
30668 IX86_BUILTIN_LWPVAL32,
30669 IX86_BUILTIN_LWPVAL64,
30670 IX86_BUILTIN_LWPINS32,
30671 IX86_BUILTIN_LWPINS64,
30673 IX86_BUILTIN_CLZS,
30675 /* RTM */
30676 IX86_BUILTIN_XBEGIN,
30677 IX86_BUILTIN_XEND,
30678 IX86_BUILTIN_XABORT,
30679 IX86_BUILTIN_XTEST,
30681 /* MPX */
30682 IX86_BUILTIN_BNDMK,
30683 IX86_BUILTIN_BNDSTX,
30684 IX86_BUILTIN_BNDLDX,
30685 IX86_BUILTIN_BNDCL,
30686 IX86_BUILTIN_BNDCU,
30687 IX86_BUILTIN_BNDRET,
30688 IX86_BUILTIN_BNDNARROW,
30689 IX86_BUILTIN_BNDINT,
30690 IX86_BUILTIN_SIZEOF,
30691 IX86_BUILTIN_BNDLOWER,
30692 IX86_BUILTIN_BNDUPPER,
30694 /* BMI instructions. */
30695 IX86_BUILTIN_BEXTR32,
30696 IX86_BUILTIN_BEXTR64,
30697 IX86_BUILTIN_CTZS,
30699 /* TBM instructions. */
30700 IX86_BUILTIN_BEXTRI32,
30701 IX86_BUILTIN_BEXTRI64,
30703 /* BMI2 instructions. */
30704 IX86_BUILTIN_BZHI32,
30705 IX86_BUILTIN_BZHI64,
30706 IX86_BUILTIN_PDEP32,
30707 IX86_BUILTIN_PDEP64,
30708 IX86_BUILTIN_PEXT32,
30709 IX86_BUILTIN_PEXT64,
30711 /* ADX instructions. */
30712 IX86_BUILTIN_ADDCARRYX32,
30713 IX86_BUILTIN_ADDCARRYX64,
30715 /* SBB instructions. */
30716 IX86_BUILTIN_SBB32,
30717 IX86_BUILTIN_SBB64,
30719 /* FSGSBASE instructions. */
30720 IX86_BUILTIN_RDFSBASE32,
30721 IX86_BUILTIN_RDFSBASE64,
30722 IX86_BUILTIN_RDGSBASE32,
30723 IX86_BUILTIN_RDGSBASE64,
30724 IX86_BUILTIN_WRFSBASE32,
30725 IX86_BUILTIN_WRFSBASE64,
30726 IX86_BUILTIN_WRGSBASE32,
30727 IX86_BUILTIN_WRGSBASE64,
30729 /* RDRND instructions. */
30730 IX86_BUILTIN_RDRAND16_STEP,
30731 IX86_BUILTIN_RDRAND32_STEP,
30732 IX86_BUILTIN_RDRAND64_STEP,
30734 /* RDSEED instructions. */
30735 IX86_BUILTIN_RDSEED16_STEP,
30736 IX86_BUILTIN_RDSEED32_STEP,
30737 IX86_BUILTIN_RDSEED64_STEP,
30739 /* F16C instructions. */
30740 IX86_BUILTIN_CVTPH2PS,
30741 IX86_BUILTIN_CVTPH2PS256,
30742 IX86_BUILTIN_CVTPS2PH,
30743 IX86_BUILTIN_CVTPS2PH256,
30745 /* MONITORX and MWAITX instrucions. */
30746 IX86_BUILTIN_MONITORX,
30747 IX86_BUILTIN_MWAITX,
30749 /* CFString built-in for darwin */
30750 IX86_BUILTIN_CFSTRING,
30752 /* Builtins to get CPU type and supported features. */
30753 IX86_BUILTIN_CPU_INIT,
30754 IX86_BUILTIN_CPU_IS,
30755 IX86_BUILTIN_CPU_SUPPORTS,
30757 /* Read/write FLAGS register built-ins. */
30758 IX86_BUILTIN_READ_FLAGS,
30759 IX86_BUILTIN_WRITE_FLAGS,
30761 IX86_BUILTIN_MAX
30764 /* Table for the ix86 builtin decls. */
30765 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30767 /* Table of all of the builtin functions that are possible with different ISA's
30768 but are waiting to be built until a function is declared to use that
30769 ISA. */
30770 struct builtin_isa {
30771 const char *name; /* function name */
30772 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30773 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30774 bool const_p; /* true if the declaration is constant */
30775 bool leaf_p; /* true if the declaration has leaf attribute */
30776 bool nothrow_p; /* true if the declaration has nothrow attribute */
30777 bool set_and_not_built_p;
30780 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30782 /* Bits that can still enable any inclusion of a builtin. */
30783 static HOST_WIDE_INT deferred_isa_values = 0;
30785 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30786 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30787 function decl in the ix86_builtins array. Returns the function decl or
30788 NULL_TREE, if the builtin was not added.
30790 If the front end has a special hook for builtin functions, delay adding
30791 builtin functions that aren't in the current ISA until the ISA is changed
30792 with function specific optimization. Doing so, can save about 300K for the
30793 default compiler. When the builtin is expanded, check at that time whether
30794 it is valid.
30796 If the front end doesn't have a special hook, record all builtins, even if
30797 it isn't an instruction set in the current ISA in case the user uses
30798 function specific options for a different ISA, so that we don't get scope
30799 errors if a builtin is added in the middle of a function scope. */
30801 static inline tree
30802 def_builtin (HOST_WIDE_INT mask, const char *name,
30803 enum ix86_builtin_func_type tcode,
30804 enum ix86_builtins code)
30806 tree decl = NULL_TREE;
30808 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30810 ix86_builtins_isa[(int) code].isa = mask;
30812 mask &= ~OPTION_MASK_ISA_64BIT;
30813 if (mask == 0
30814 || (mask & ix86_isa_flags) != 0
30815 || (lang_hooks.builtin_function
30816 == lang_hooks.builtin_function_ext_scope))
30819 tree type = ix86_get_builtin_func_type (tcode);
30820 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30821 NULL, NULL_TREE);
30822 ix86_builtins[(int) code] = decl;
30823 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30825 else
30827 /* Just a MASK where set_and_not_built_p == true can potentially
30828 include a builtin. */
30829 deferred_isa_values |= mask;
30830 ix86_builtins[(int) code] = NULL_TREE;
30831 ix86_builtins_isa[(int) code].tcode = tcode;
30832 ix86_builtins_isa[(int) code].name = name;
30833 ix86_builtins_isa[(int) code].leaf_p = false;
30834 ix86_builtins_isa[(int) code].nothrow_p = false;
30835 ix86_builtins_isa[(int) code].const_p = false;
30836 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30840 return decl;
30843 /* Like def_builtin, but also marks the function decl "const". */
30845 static inline tree
30846 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30847 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30849 tree decl = def_builtin (mask, name, tcode, code);
30850 if (decl)
30851 TREE_READONLY (decl) = 1;
30852 else
30853 ix86_builtins_isa[(int) code].const_p = true;
30855 return decl;
30858 /* Add any new builtin functions for a given ISA that may not have been
30859 declared. This saves a bit of space compared to adding all of the
30860 declarations to the tree, even if we didn't use them. */
30862 static void
30863 ix86_add_new_builtins (HOST_WIDE_INT isa)
30865 if ((isa & deferred_isa_values) == 0)
30866 return;
30868 /* Bits in ISA value can be removed from potential isa values. */
30869 deferred_isa_values &= ~isa;
30871 int i;
30872 tree saved_current_target_pragma = current_target_pragma;
30873 current_target_pragma = NULL_TREE;
30875 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30877 if ((ix86_builtins_isa[i].isa & isa) != 0
30878 && ix86_builtins_isa[i].set_and_not_built_p)
30880 tree decl, type;
30882 /* Don't define the builtin again. */
30883 ix86_builtins_isa[i].set_and_not_built_p = false;
30885 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30886 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30887 type, i, BUILT_IN_MD, NULL,
30888 NULL_TREE);
30890 ix86_builtins[i] = decl;
30891 if (ix86_builtins_isa[i].const_p)
30892 TREE_READONLY (decl) = 1;
30893 if (ix86_builtins_isa[i].leaf_p)
30894 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30895 NULL_TREE);
30896 if (ix86_builtins_isa[i].nothrow_p)
30897 TREE_NOTHROW (decl) = 1;
30901 current_target_pragma = saved_current_target_pragma;
30904 /* Bits for builtin_description.flag. */
30906 /* Set when we don't support the comparison natively, and should
30907 swap_comparison in order to support it. */
30908 #define BUILTIN_DESC_SWAP_OPERANDS 1
30910 struct builtin_description
30912 const HOST_WIDE_INT mask;
30913 const enum insn_code icode;
30914 const char *const name;
30915 const enum ix86_builtins code;
30916 const enum rtx_code comparison;
30917 const int flag;
30920 static const struct builtin_description bdesc_comi[] =
30922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30924 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30948 static const struct builtin_description bdesc_pcmpestr[] =
30950 /* SSE4.2 */
30951 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30952 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30953 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30954 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30955 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30956 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30960 static const struct builtin_description bdesc_pcmpistr[] =
30962 /* SSE4.2 */
30963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30968 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30972 /* Special builtins with variable number of arguments. */
30973 static const struct builtin_description bdesc_special_args[] =
30975 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30976 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30977 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30979 /* 80387 (for use internally for atomic compound assignment). */
30980 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30981 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30982 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30983 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30985 /* MMX */
30986 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30988 /* 3DNow! */
30989 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30991 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30992 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30993 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30994 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30995 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30996 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30997 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30998 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30999 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31001 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31002 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31003 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31004 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31005 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31006 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31007 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31008 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31010 /* SSE */
31011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31020 /* SSE or 3DNow!A */
31021 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31022 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
31024 /* SSE2 */
31025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
31029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
31031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
31032 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
31033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
31034 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31039 /* SSE3 */
31040 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31042 /* SSE4.1 */
31043 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
31045 /* SSE4A */
31046 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31047 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31049 /* AVX */
31050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
31051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
31053 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31054 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31055 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
31057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
31059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
31065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
31068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31080 /* AVX2 */
31081 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31082 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31083 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31084 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31085 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31091 /* AVX512F */
31092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
31093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
31094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
31095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
31096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI },
31109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI },
31110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI },
31111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI },
31112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
31117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
31118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
31119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
31120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
31121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI },
31122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
31123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
31124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI },
31125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
31126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
31127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI },
31128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
31129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
31130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_UQI },
31131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
31132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
31133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI },
31134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
31135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI },
31136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI },
31137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI },
31138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI },
31140 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31141 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31142 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31143 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31144 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31145 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31147 /* FSGSBASE */
31148 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31149 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31150 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31151 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31152 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31153 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31154 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31155 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31157 /* RTM */
31158 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31159 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31160 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31162 /* AVX512BW */
31163 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI },
31164 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI },
31165 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI },
31166 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI },
31168 /* AVX512VL */
31169 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI },
31170 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI },
31171 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI },
31172 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI },
31173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
31178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
31179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
31180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
31181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
31186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
31187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
31188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
31189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31191 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31192 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
31194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
31195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
31196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
31197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
31202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
31203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
31204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
31205 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI },
31206 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI },
31207 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI },
31208 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI },
31209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_UQI },
31210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_UQI },
31211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_UQI },
31212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_UQI },
31213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_UQI },
31214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI },
31215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI },
31216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI },
31217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI },
31226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI },
31227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI },
31228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI },
31229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI },
31230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI },
31231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI },
31232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI },
31233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
31234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
31235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
31236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
31237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_UQI },
31238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_UQI },
31239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
31240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
31241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
31242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
31243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_UQI },
31244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_UQI },
31245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
31246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
31247 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
31248 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
31249 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_UQI },
31250 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_UQI },
31251 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
31252 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
31253 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
31254 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
31255 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_UQI },
31256 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_UQI },
31257 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
31258 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
31259 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
31260 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
31261 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI },
31262 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI },
31264 /* PCOMMIT. */
31265 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31268 /* Builtins with variable number of arguments. */
31269 static const struct builtin_description bdesc_args[] =
31271 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31272 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31273 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31274 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31275 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31276 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31277 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31279 /* MMX */
31280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31328 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31343 /* 3DNow! */
31344 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31345 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31346 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31347 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31349 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31350 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31351 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31352 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31353 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31354 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31355 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31356 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31357 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31358 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31359 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31360 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31361 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31362 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31363 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31365 /* 3DNow!A */
31366 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31367 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31368 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31369 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31370 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31371 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31373 /* SSE */
31374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31376 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31378 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31382 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31385 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31389 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31390 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31391 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31419 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31420 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31424 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31426 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31427 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31429 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31433 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31434 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31435 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31439 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31441 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31444 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31447 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31448 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31450 /* SSE MMX or 3Dnow!A */
31451 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31452 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31453 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31455 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31456 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31457 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31458 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31460 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31463 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31465 /* SSE2 */
31466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31484 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31485 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31489 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31491 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31492 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31493 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31494 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31510 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31522 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31526 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31528 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31529 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31531 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31534 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31535 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31537 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31539 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31540 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31541 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31542 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31543 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31544 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31545 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31546 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31557 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31558 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31560 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31562 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31563 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31575 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31576 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31577 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31580 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31581 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31582 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31583 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31584 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31585 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31586 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31587 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31593 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31597 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31602 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31607 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31608 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31609 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31610 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31611 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31615 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31616 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31617 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31618 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31619 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31620 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31622 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31623 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31624 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31625 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31635 /* SSE2 MMX */
31636 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31637 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31639 /* SSE3 */
31640 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31641 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31643 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31644 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31645 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31646 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31647 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31648 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31650 /* SSSE3 */
31651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31665 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31666 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31667 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31670 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31671 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31672 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31673 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31680 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31683 /* SSSE3. */
31684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31687 /* SSE4.1 */
31688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31696 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31709 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31710 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31721 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31726 /* SSE4.1 */
31727 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31728 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31729 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31730 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31732 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31733 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31734 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31735 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31737 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31738 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31740 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31741 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31743 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31744 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31745 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31746 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31748 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31749 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31751 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31752 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31754 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31755 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31756 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptestv2di, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31758 /* SSE4.2 */
31759 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31760 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31761 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31762 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31763 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31765 /* SSE4A */
31766 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31767 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31768 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31769 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31771 /* AES */
31772 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31773 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31775 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31776 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31777 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31778 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31780 /* PCLMUL */
31781 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31783 /* AVX */
31784 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31785 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31788 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31789 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31792 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31798 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31799 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31800 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31801 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31802 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31803 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31804 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31805 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31806 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31807 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31808 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31809 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31832 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31833 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31837 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31839 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31855 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31857 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31859 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31871 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31872 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31885 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31886 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31896 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31897 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31898 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptestv4di, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31919 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31920 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31922 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31924 /* AVX2 */
31925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31926 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31927 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31928 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31933 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31934 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31935 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31936 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31942 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31964 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31965 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31966 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31967 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31968 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31969 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31970 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31971 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31972 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31973 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31974 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31975 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31989 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31991 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31992 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31993 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31994 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31995 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31996 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31999 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
32000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32001 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32002 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32005 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32006 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32007 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32008 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32009 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32010 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32011 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32012 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32013 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32014 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32015 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32017 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32018 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32019 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32020 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32021 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32022 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32023 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32024 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32025 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32026 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32039 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
32047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
32048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32055 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32056 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32057 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
32058 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
32059 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
32060 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
32061 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32062 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32063 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32064 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32072 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32074 /* BMI */
32075 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32076 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32077 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32079 /* TBM */
32080 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32081 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32083 /* F16C */
32084 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32085 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32086 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32087 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32089 /* BMI2 */
32090 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32091 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32092 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32093 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32094 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32095 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32097 /* AVX512F */
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI },
32153 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI },
32154 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI },
32155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
32156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI },
32157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
32176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
32177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
32178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
32179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
32180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
32181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
32182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
32183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
32184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
32185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
32186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
32187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
32188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
32189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
32190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI },
32191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI },
32192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI },
32193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI },
32194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI },
32195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI },
32196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI },
32197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI },
32198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI },
32199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI },
32200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
32201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI },
32203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
32215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
32217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
32221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
32223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI },
32227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI },
32228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI },
32229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI },
32237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI },
32238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI },
32249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI },
32250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
32253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
32254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI },
32255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI },
32256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI },
32257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI },
32258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI },
32259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI },
32260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
32261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
32262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
32263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
32264 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32265 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32266 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI },
32267 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI },
32268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
32269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI },
32270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
32272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
32273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
32275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
32276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
32277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
32278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
32281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI },
32282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
32283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI },
32284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI },
32287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
32288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI },
32289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI },
32290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
32291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI },
32292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
32293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI },
32295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32299 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32304 /* Mask arithmetic operations */
32305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI },
32308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI },
32314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI },
32316 /* SHA */
32317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32325 /* AVX512VL. */
32326 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT },
32327 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32336 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
32337 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
32338 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
32339 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32364 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32365 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32366 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32367 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32368 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32369 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32370 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
32373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
32374 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32375 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
32376 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
32377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
32378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
32379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_UQI },
32380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_UQI },
32381 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI },
32382 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI },
32383 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32384 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32385 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32386 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32387 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32388 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32389 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI },
32390 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32393 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32394 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32395 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32396 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI },
32406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI },
32407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
32408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_UQI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_UQI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_UQI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_UQI },
32417 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_USI },
32418 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_USI },
32419 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
32420 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_UHI },
32421 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_UHI },
32422 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_UHI },
32423 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_UQI },
32425 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_UQI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_UQI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_UQI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_UQI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_UQI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_UQI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_UQI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_UQI },
32436 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_UQI },
32437 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_UQI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI },
32440 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
32441 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
32448 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
32449 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
32450 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
32451 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
32452 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_UHI },
32453 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_UQI },
32454 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_UQI },
32455 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_UQI },
32456 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_UQI },
32457 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_UQI },
32458 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_UQI },
32459 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_UQI },
32460 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_UQI },
32461 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_UQI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_UQI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_UQI },
32464 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32465 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32466 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32467 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32468 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32469 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32470 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32471 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32472 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32473 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32476 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32477 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32478 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32479 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32489 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32500 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32501 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32506 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32507 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32518 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32519 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32520 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32521 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
32526 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
32527 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
32528 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
32529 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
32530 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_UQI },
32531 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_UHI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_UQI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_UQI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_UQI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_UQI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
32546 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
32547 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
32548 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_UQI },
32549 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_UQI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_UQI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_UQI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_UQI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_UQI },
32562 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
32563 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
32564 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
32565 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
32566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32570 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
32571 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI },
32572 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
32573 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI },
32574 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
32575 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI },
32576 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
32577 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI },
32578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32582 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32583 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32584 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32585 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32586 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32588 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32589 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32590 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32591 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32592 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32593 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32594 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32595 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32596 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32597 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32598 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32599 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32600 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32603 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32606 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_UQI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32628 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32629 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_UQI },
32630 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32631 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32644 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
32645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
32646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_USI },
32647 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_UHI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32708 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI },
32709 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI },
32710 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32711 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32712 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_UHI },
32713 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_UQI },
32714 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_UQI },
32715 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_UQI },
32716 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32717 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32720 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI },
32721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI },
32722 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32723 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32724 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI },
32725 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI },
32726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32736 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
32737 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
32738 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI },
32739 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_UQI },
32740 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
32741 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
32742 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI },
32743 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_UQI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_UQI },
32752 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
32753 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_UQI },
32754 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
32755 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
32756 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32757 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_UQI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_UQI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
32768 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32769 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32770 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32771 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32772 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32773 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
32787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_UQI },
32788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32800 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32801 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
32802 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_UHI },
32805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_UHI },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_UQI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_UQI },
32812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_UQI },
32816 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_UQI },
32817 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_UQI },
32818 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32819 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_UQI },
32820 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_UQI },
32821 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32822 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) UHI_FTYPE_V16QI },
32823 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) USI_FTYPE_V32QI },
32824 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) UQI_FTYPE_V8HI },
32825 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) UHI_FTYPE_V16HI },
32826 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) UQI_FTYPE_V4SI },
32827 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) UQI_FTYPE_V8SI },
32828 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) UQI_FTYPE_V2DI },
32829 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) UQI_FTYPE_V4DI },
32830 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_UHI },
32831 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_USI },
32832 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_UQI },
32833 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_UHI },
32834 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_UQI },
32835 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_UQI },
32836 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_UQI },
32837 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_UQI },
32838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32839 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32840 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32841 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32847 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32848 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32849 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32854 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32855 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32856 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32857 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32862 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI },
32863 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI },
32864 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_UQI },
32865 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_UHI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_UQI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_UQI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_UQI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_UQI },
32870 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_UQI },
32871 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_UQI },
32872 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_UHI },
32873 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_UHI },
32874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
32892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
32894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32898 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32899 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32900 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32901 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32902 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32914 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32915 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32916 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32917 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32918 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32919 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32920 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32921 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32922 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32923 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32924 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32925 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32926 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32927 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32928 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32929 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32930 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32931 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32932 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32933 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI },
32939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI },
32940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI },
32941 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32942 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32943 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
32944 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
32945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI },
32946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI },
32947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI },
32948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI },
32949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI },
32950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_UQI },
32951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_UQI },
32952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_UQI },
32953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_UQI },
32954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
32959 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
32960 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
32961 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
32962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32964 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32965 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32966 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32967 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32968 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32969 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32970 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32971 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32972 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
32973 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
32974 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_UHI },
32975 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_UQI },
32976 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
32977 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI },
32978 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
32979 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI },
32980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
32981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
32982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
32984 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
32985 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
32986 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
32987 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_UQI },
32988 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_UQI },
32989 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_UQI },
32990 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_UQI },
32991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_UQI },
32992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_UQI },
32993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI },
32994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI },
32995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI },
32996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI },
32997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI },
32998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI },
32999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI },
33000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI },
33001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI },
33002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI },
33003 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI },
33004 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI },
33005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI },
33006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI },
33007 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33008 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI },
33009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
33010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_UQI },
33011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
33012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_UQI },
33013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI },
33014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_UQI },
33015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI },
33016 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_UQI },
33018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
33019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
33020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) UQI_FTYPE_V4DI_V4DI_INT_UQI },
33021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) UQI_FTYPE_V8SI_V8SI_INT_UQI },
33022 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
33023 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
33024 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_INT_USI },
33025 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) UHI_FTYPE_V16HI_V16HI_INT_UHI },
33026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_UQI },
33027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_UQI },
33028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
33029 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
33030 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) UQI_FTYPE_V2DI_V2DI_INT_UQI },
33031 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) UQI_FTYPE_V4SI_V4SI_INT_UQI },
33032 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
33033 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
33034 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_INT_UHI },
33035 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) UQI_FTYPE_V8HI_V8HI_INT_UQI },
33036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI },
33037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI },
33039 /* AVX512DQ. */
33040 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI },
33041 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI },
33042 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI },
33043 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI },
33044 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI },
33045 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI },
33046 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI },
33047 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI },
33048 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI },
33049 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI },
33050 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI },
33051 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI },
33052 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33053 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33054 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33055 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33056 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33057 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI },
33058 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33059 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI},
33060 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI },
33061 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI },
33062 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI },
33063 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI },
33064 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI },
33065 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI },
33066 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI },
33067 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI },
33068 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI },
33069 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI },
33070 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI },
33072 /* AVX512BW. */
33073 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI },
33074 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI },
33075 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
33076 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33077 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33078 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI },
33079 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33080 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT },
33081 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
33082 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
33083 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33084 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI },
33085 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI },
33086 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI },
33087 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI },
33088 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI },
33089 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
33090 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI },
33091 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33092 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33093 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33094 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33095 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33096 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33097 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33098 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33099 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33100 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33101 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33102 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33103 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33104 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33105 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33106 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33107 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33108 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33109 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33110 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33111 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33112 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33113 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33114 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33115 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33116 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33117 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
33118 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
33119 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI },
33120 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33121 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33122 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33123 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33124 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33125 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
33126 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
33127 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI },
33128 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33129 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI },
33130 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI },
33131 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33132 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33133 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33134 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33135 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33136 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33137 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33138 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33139 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33140 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
33141 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI },
33142 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI },
33143 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI },
33144 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI },
33145 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI },
33146 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI },
33147 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33148 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33149 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33150 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33151 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33152 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33153 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI },
33154 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI },
33155 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI },
33156 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
33157 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
33158 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI },
33159 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI },
33160 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
33161 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
33162 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI },
33163 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI },
33165 /* AVX512IFMA */
33166 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33167 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33168 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33169 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI },
33170 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33171 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33172 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33173 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI },
33174 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33175 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33176 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33177 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI },
33179 /* AVX512VBMI */
33180 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33181 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33182 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33183 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33184 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33185 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33186 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI },
33187 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33188 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33189 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33190 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33191 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33192 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33193 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI },
33194 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI },
33197 /* Builtins with rounding support. */
33198 static const struct builtin_description bdesc_round_args[] =
33200 /* AVX512F */
33201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT },
33206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT },
33207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT },
33208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT },
33209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33220 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33222 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33229 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33231 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33281 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33283 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33285 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33287 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33289 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33291 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33293 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33295 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT },
33318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33321 /* AVX512ER */
33322 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33323 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33324 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33325 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33326 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33327 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33328 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33329 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33330 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33331 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33333 /* AVX512DQ. */
33334 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33335 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33336 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33337 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33338 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33339 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33340 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33341 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33342 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33343 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33344 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33345 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33346 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33347 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33348 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33349 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33352 /* Bultins for MPX. */
33353 static const struct builtin_description bdesc_mpx[] =
33355 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33356 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33357 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33360 /* Const builtins for MPX. */
33361 static const struct builtin_description bdesc_mpx_const[] =
33363 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33364 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33365 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33366 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33367 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33368 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33369 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33370 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33373 /* FMA4 and XOP. */
33374 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33375 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33376 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33377 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33378 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33379 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33380 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33381 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33382 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33383 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33384 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33385 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33386 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33387 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33388 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33389 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33390 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33391 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33392 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33393 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33394 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33395 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33396 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33397 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33398 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33399 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33400 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33401 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33402 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33403 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33404 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33405 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33406 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33407 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33408 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33409 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33410 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33411 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33412 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33413 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33414 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33415 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33416 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33417 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33418 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33419 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33420 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33421 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33422 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33423 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33424 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33425 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33427 static const struct builtin_description bdesc_multi_arg[] =
33429 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33430 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33431 UNKNOWN, (int)MULTI_ARG_3_SF },
33432 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33433 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33434 UNKNOWN, (int)MULTI_ARG_3_DF },
33436 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33437 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33438 UNKNOWN, (int)MULTI_ARG_3_SF },
33439 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33440 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33441 UNKNOWN, (int)MULTI_ARG_3_DF },
33443 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33444 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33445 UNKNOWN, (int)MULTI_ARG_3_SF },
33446 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33447 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33448 UNKNOWN, (int)MULTI_ARG_3_DF },
33449 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33450 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33451 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33452 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33453 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33454 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33456 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33457 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33458 UNKNOWN, (int)MULTI_ARG_3_SF },
33459 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33460 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33461 UNKNOWN, (int)MULTI_ARG_3_DF },
33462 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33463 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33464 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33465 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33466 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33467 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33629 /* TM vector builtins. */
33631 /* Reuse the existing x86-specific `struct builtin_description' cause
33632 we're lazy. Add casts to make them fit. */
33633 static const struct builtin_description bdesc_tm[] =
33635 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33636 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33637 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33638 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33639 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33640 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33641 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33643 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33644 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33645 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33646 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33647 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33648 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33649 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33651 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33652 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33653 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33654 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33655 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33656 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33657 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33659 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33660 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33661 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33664 /* TM callbacks. */
33666 /* Return the builtin decl needed to load a vector of TYPE. */
33668 static tree
33669 ix86_builtin_tm_load (tree type)
33671 if (TREE_CODE (type) == VECTOR_TYPE)
33673 switch (tree_to_uhwi (TYPE_SIZE (type)))
33675 case 64:
33676 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33677 case 128:
33678 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33679 case 256:
33680 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33683 return NULL_TREE;
33686 /* Return the builtin decl needed to store a vector of TYPE. */
33688 static tree
33689 ix86_builtin_tm_store (tree type)
33691 if (TREE_CODE (type) == VECTOR_TYPE)
33693 switch (tree_to_uhwi (TYPE_SIZE (type)))
33695 case 64:
33696 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33697 case 128:
33698 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33699 case 256:
33700 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33703 return NULL_TREE;
33706 /* Initialize the transactional memory vector load/store builtins. */
33708 static void
33709 ix86_init_tm_builtins (void)
33711 enum ix86_builtin_func_type ftype;
33712 const struct builtin_description *d;
33713 size_t i;
33714 tree decl;
33715 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33716 tree attrs_log, attrs_type_log;
33718 if (!flag_tm)
33719 return;
33721 /* If there are no builtins defined, we must be compiling in a
33722 language without trans-mem support. */
33723 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33724 return;
33726 /* Use whatever attributes a normal TM load has. */
33727 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33728 attrs_load = DECL_ATTRIBUTES (decl);
33729 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33730 /* Use whatever attributes a normal TM store has. */
33731 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33732 attrs_store = DECL_ATTRIBUTES (decl);
33733 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33734 /* Use whatever attributes a normal TM log has. */
33735 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33736 attrs_log = DECL_ATTRIBUTES (decl);
33737 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33739 for (i = 0, d = bdesc_tm;
33740 i < ARRAY_SIZE (bdesc_tm);
33741 i++, d++)
33743 if ((d->mask & ix86_isa_flags) != 0
33744 || (lang_hooks.builtin_function
33745 == lang_hooks.builtin_function_ext_scope))
33747 tree type, attrs, attrs_type;
33748 enum built_in_function code = (enum built_in_function) d->code;
33750 ftype = (enum ix86_builtin_func_type) d->flag;
33751 type = ix86_get_builtin_func_type (ftype);
33753 if (BUILTIN_TM_LOAD_P (code))
33755 attrs = attrs_load;
33756 attrs_type = attrs_type_load;
33758 else if (BUILTIN_TM_STORE_P (code))
33760 attrs = attrs_store;
33761 attrs_type = attrs_type_store;
33763 else
33765 attrs = attrs_log;
33766 attrs_type = attrs_type_log;
33768 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33769 /* The builtin without the prefix for
33770 calling it directly. */
33771 d->name + strlen ("__builtin_"),
33772 attrs);
33773 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33774 set the TYPE_ATTRIBUTES. */
33775 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33777 set_builtin_decl (code, decl, false);
33782 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33783 in the current target ISA to allow the user to compile particular modules
33784 with different target specific options that differ from the command line
33785 options. */
33786 static void
33787 ix86_init_mmx_sse_builtins (void)
33789 const struct builtin_description * d;
33790 enum ix86_builtin_func_type ftype;
33791 size_t i;
33793 /* Add all special builtins with variable number of operands. */
33794 for (i = 0, d = bdesc_special_args;
33795 i < ARRAY_SIZE (bdesc_special_args);
33796 i++, d++)
33798 if (d->name == 0)
33799 continue;
33801 ftype = (enum ix86_builtin_func_type) d->flag;
33802 def_builtin (d->mask, d->name, ftype, d->code);
33805 /* Add all builtins with variable number of operands. */
33806 for (i = 0, d = bdesc_args;
33807 i < ARRAY_SIZE (bdesc_args);
33808 i++, d++)
33810 if (d->name == 0)
33811 continue;
33813 ftype = (enum ix86_builtin_func_type) d->flag;
33814 def_builtin_const (d->mask, d->name, ftype, d->code);
33817 /* Add all builtins with rounding. */
33818 for (i = 0, d = bdesc_round_args;
33819 i < ARRAY_SIZE (bdesc_round_args);
33820 i++, d++)
33822 if (d->name == 0)
33823 continue;
33825 ftype = (enum ix86_builtin_func_type) d->flag;
33826 def_builtin_const (d->mask, d->name, ftype, d->code);
33829 /* pcmpestr[im] insns. */
33830 for (i = 0, d = bdesc_pcmpestr;
33831 i < ARRAY_SIZE (bdesc_pcmpestr);
33832 i++, d++)
33834 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33835 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33836 else
33837 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33838 def_builtin_const (d->mask, d->name, ftype, d->code);
33841 /* pcmpistr[im] insns. */
33842 for (i = 0, d = bdesc_pcmpistr;
33843 i < ARRAY_SIZE (bdesc_pcmpistr);
33844 i++, d++)
33846 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33847 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33848 else
33849 ftype = INT_FTYPE_V16QI_V16QI_INT;
33850 def_builtin_const (d->mask, d->name, ftype, d->code);
33853 /* comi/ucomi insns. */
33854 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33856 if (d->mask == OPTION_MASK_ISA_SSE2)
33857 ftype = INT_FTYPE_V2DF_V2DF;
33858 else
33859 ftype = INT_FTYPE_V4SF_V4SF;
33860 def_builtin_const (d->mask, d->name, ftype, d->code);
33863 /* SSE */
33864 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33865 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33866 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33867 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33869 /* SSE or 3DNow!A */
33870 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33871 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33872 IX86_BUILTIN_MASKMOVQ);
33874 /* SSE2 */
33875 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33876 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33878 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33879 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33880 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33881 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33883 /* SSE3. */
33884 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33885 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33886 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33887 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33889 /* AES */
33890 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33891 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33892 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33893 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33894 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33895 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33896 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33897 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33898 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33899 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33900 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33901 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33903 /* PCLMUL */
33904 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33905 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33907 /* RDRND */
33908 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33909 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33910 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33911 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33912 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33913 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33914 IX86_BUILTIN_RDRAND64_STEP);
33916 /* AVX2 */
33917 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33918 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33919 IX86_BUILTIN_GATHERSIV2DF);
33921 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33922 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33923 IX86_BUILTIN_GATHERSIV4DF);
33925 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33926 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33927 IX86_BUILTIN_GATHERDIV2DF);
33929 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33930 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33931 IX86_BUILTIN_GATHERDIV4DF);
33933 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33934 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33935 IX86_BUILTIN_GATHERSIV4SF);
33937 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33938 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33939 IX86_BUILTIN_GATHERSIV8SF);
33941 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33942 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33943 IX86_BUILTIN_GATHERDIV4SF);
33945 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33946 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33947 IX86_BUILTIN_GATHERDIV8SF);
33949 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33950 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33951 IX86_BUILTIN_GATHERSIV2DI);
33953 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33954 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33955 IX86_BUILTIN_GATHERSIV4DI);
33957 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33958 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33959 IX86_BUILTIN_GATHERDIV2DI);
33961 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33962 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33963 IX86_BUILTIN_GATHERDIV4DI);
33965 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33966 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33967 IX86_BUILTIN_GATHERSIV4SI);
33969 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33970 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33971 IX86_BUILTIN_GATHERSIV8SI);
33973 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33974 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33975 IX86_BUILTIN_GATHERDIV4SI);
33977 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33978 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33979 IX86_BUILTIN_GATHERDIV8SI);
33981 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33982 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33983 IX86_BUILTIN_GATHERALTSIV4DF);
33985 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33986 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33987 IX86_BUILTIN_GATHERALTDIV8SF);
33989 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33990 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33991 IX86_BUILTIN_GATHERALTSIV4DI);
33993 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33994 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33995 IX86_BUILTIN_GATHERALTDIV8SI);
33997 /* AVX512F */
33998 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33999 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
34000 IX86_BUILTIN_GATHER3SIV16SF);
34002 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
34003 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
34004 IX86_BUILTIN_GATHER3SIV8DF);
34006 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
34007 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
34008 IX86_BUILTIN_GATHER3DIV16SF);
34010 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
34011 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
34012 IX86_BUILTIN_GATHER3DIV8DF);
34014 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
34015 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
34016 IX86_BUILTIN_GATHER3SIV16SI);
34018 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
34019 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
34020 IX86_BUILTIN_GATHER3SIV8DI);
34022 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
34023 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
34024 IX86_BUILTIN_GATHER3DIV16SI);
34026 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
34027 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
34028 IX86_BUILTIN_GATHER3DIV8DI);
34030 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
34031 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
34032 IX86_BUILTIN_GATHER3ALTSIV8DF);
34034 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
34035 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
34036 IX86_BUILTIN_GATHER3ALTDIV16SF);
34038 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
34039 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
34040 IX86_BUILTIN_GATHER3ALTSIV8DI);
34042 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
34043 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
34044 IX86_BUILTIN_GATHER3ALTDIV16SI);
34046 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
34047 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
34048 IX86_BUILTIN_SCATTERSIV16SF);
34050 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
34051 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
34052 IX86_BUILTIN_SCATTERSIV8DF);
34054 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
34055 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
34056 IX86_BUILTIN_SCATTERDIV16SF);
34058 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
34059 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
34060 IX86_BUILTIN_SCATTERDIV8DF);
34062 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
34063 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
34064 IX86_BUILTIN_SCATTERSIV16SI);
34066 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
34067 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
34068 IX86_BUILTIN_SCATTERSIV8DI);
34070 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34071 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34072 IX86_BUILTIN_SCATTERDIV16SI);
34074 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34075 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34076 IX86_BUILTIN_SCATTERDIV8DI);
34078 /* AVX512VL */
34079 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34080 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34081 IX86_BUILTIN_GATHER3SIV2DF);
34083 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34084 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34085 IX86_BUILTIN_GATHER3SIV4DF);
34087 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34088 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34089 IX86_BUILTIN_GATHER3DIV2DF);
34091 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34092 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34093 IX86_BUILTIN_GATHER3DIV4DF);
34095 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34096 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34097 IX86_BUILTIN_GATHER3SIV4SF);
34099 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34100 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34101 IX86_BUILTIN_GATHER3SIV8SF);
34103 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34104 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34105 IX86_BUILTIN_GATHER3DIV4SF);
34107 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34108 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34109 IX86_BUILTIN_GATHER3DIV8SF);
34111 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34112 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34113 IX86_BUILTIN_GATHER3SIV2DI);
34115 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34116 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34117 IX86_BUILTIN_GATHER3SIV4DI);
34119 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34120 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34121 IX86_BUILTIN_GATHER3DIV2DI);
34123 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34124 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34125 IX86_BUILTIN_GATHER3DIV4DI);
34127 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34128 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34129 IX86_BUILTIN_GATHER3SIV4SI);
34131 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34132 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34133 IX86_BUILTIN_GATHER3SIV8SI);
34135 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34136 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34137 IX86_BUILTIN_GATHER3DIV4SI);
34139 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34140 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34141 IX86_BUILTIN_GATHER3DIV8SI);
34143 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34144 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34145 IX86_BUILTIN_GATHER3ALTSIV4DF);
34147 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34148 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34149 IX86_BUILTIN_GATHER3ALTDIV8SF);
34151 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34152 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34153 IX86_BUILTIN_GATHER3ALTSIV4DI);
34155 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34156 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34157 IX86_BUILTIN_GATHER3ALTDIV8SI);
34159 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34160 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34161 IX86_BUILTIN_SCATTERSIV8SF);
34163 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34164 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34165 IX86_BUILTIN_SCATTERSIV4SF);
34167 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34168 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34169 IX86_BUILTIN_SCATTERSIV4DF);
34171 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34172 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34173 IX86_BUILTIN_SCATTERSIV2DF);
34175 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34176 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34177 IX86_BUILTIN_SCATTERDIV8SF);
34179 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34180 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34181 IX86_BUILTIN_SCATTERDIV4SF);
34183 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34184 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34185 IX86_BUILTIN_SCATTERDIV4DF);
34187 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34188 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34189 IX86_BUILTIN_SCATTERDIV2DF);
34191 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34192 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34193 IX86_BUILTIN_SCATTERSIV8SI);
34195 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34196 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34197 IX86_BUILTIN_SCATTERSIV4SI);
34199 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34200 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34201 IX86_BUILTIN_SCATTERSIV4DI);
34203 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34204 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34205 IX86_BUILTIN_SCATTERSIV2DI);
34207 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34208 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34209 IX86_BUILTIN_SCATTERDIV8SI);
34211 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34212 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34213 IX86_BUILTIN_SCATTERDIV4SI);
34215 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34216 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34217 IX86_BUILTIN_SCATTERDIV4DI);
34219 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34220 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34221 IX86_BUILTIN_SCATTERDIV2DI);
34223 /* AVX512PF */
34224 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34225 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34226 IX86_BUILTIN_GATHERPFDPD);
34227 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34228 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34229 IX86_BUILTIN_GATHERPFDPS);
34230 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34231 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34232 IX86_BUILTIN_GATHERPFQPD);
34233 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34234 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34235 IX86_BUILTIN_GATHERPFQPS);
34236 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34237 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34238 IX86_BUILTIN_SCATTERPFDPD);
34239 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34240 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34241 IX86_BUILTIN_SCATTERPFDPS);
34242 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34243 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34244 IX86_BUILTIN_SCATTERPFQPD);
34245 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34246 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34247 IX86_BUILTIN_SCATTERPFQPS);
34249 /* SHA */
34250 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34251 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34252 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34253 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34254 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34255 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34256 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34257 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34258 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34259 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34260 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34261 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34262 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34263 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34265 /* RTM. */
34266 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34267 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34269 /* MMX access to the vec_init patterns. */
34270 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34271 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34273 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34274 V4HI_FTYPE_HI_HI_HI_HI,
34275 IX86_BUILTIN_VEC_INIT_V4HI);
34277 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34278 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34279 IX86_BUILTIN_VEC_INIT_V8QI);
34281 /* Access to the vec_extract patterns. */
34282 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34283 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34284 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34285 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34286 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34287 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34288 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34289 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34290 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34291 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34293 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34294 "__builtin_ia32_vec_ext_v4hi",
34295 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34297 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34298 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34300 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34301 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34303 /* Access to the vec_set patterns. */
34304 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34305 "__builtin_ia32_vec_set_v2di",
34306 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34308 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34309 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34311 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34312 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34314 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34315 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34317 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34318 "__builtin_ia32_vec_set_v4hi",
34319 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34321 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34322 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34324 /* RDSEED */
34325 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34326 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34327 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34328 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34329 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34330 "__builtin_ia32_rdseed_di_step",
34331 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34333 /* ADCX */
34334 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34335 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34336 def_builtin (OPTION_MASK_ISA_64BIT,
34337 "__builtin_ia32_addcarryx_u64",
34338 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34339 IX86_BUILTIN_ADDCARRYX64);
34341 /* SBB */
34342 def_builtin (0, "__builtin_ia32_sbb_u32",
34343 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34344 def_builtin (OPTION_MASK_ISA_64BIT,
34345 "__builtin_ia32_sbb_u64",
34346 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34347 IX86_BUILTIN_SBB64);
34349 /* Read/write FLAGS. */
34350 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34351 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34352 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34353 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34354 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34355 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34356 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34357 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34359 /* CLFLUSHOPT. */
34360 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34361 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34363 /* CLWB. */
34364 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34365 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34367 /* MONITORX and MWAITX. */
34368 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34369 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34370 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34371 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34373 /* Add FMA4 multi-arg argument instructions */
34374 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34376 if (d->name == 0)
34377 continue;
34379 ftype = (enum ix86_builtin_func_type) d->flag;
34380 def_builtin_const (d->mask, d->name, ftype, d->code);
34384 static void
34385 ix86_init_mpx_builtins ()
34387 const struct builtin_description * d;
34388 enum ix86_builtin_func_type ftype;
34389 tree decl;
34390 size_t i;
34392 for (i = 0, d = bdesc_mpx;
34393 i < ARRAY_SIZE (bdesc_mpx);
34394 i++, d++)
34396 if (d->name == 0)
34397 continue;
34399 ftype = (enum ix86_builtin_func_type) d->flag;
34400 decl = def_builtin (d->mask, d->name, ftype, d->code);
34402 /* With no leaf and nothrow flags for MPX builtins
34403 abnormal edges may follow its call when setjmp
34404 presents in the function. Since we may have a lot
34405 of MPX builtins calls it causes lots of useless
34406 edges and enormous PHI nodes. To avoid this we mark
34407 MPX builtins as leaf and nothrow. */
34408 if (decl)
34410 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34411 NULL_TREE);
34412 TREE_NOTHROW (decl) = 1;
34414 else
34416 ix86_builtins_isa[(int)d->code].leaf_p = true;
34417 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34421 for (i = 0, d = bdesc_mpx_const;
34422 i < ARRAY_SIZE (bdesc_mpx_const);
34423 i++, d++)
34425 if (d->name == 0)
34426 continue;
34428 ftype = (enum ix86_builtin_func_type) d->flag;
34429 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34431 if (decl)
34433 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34434 NULL_TREE);
34435 TREE_NOTHROW (decl) = 1;
34437 else
34439 ix86_builtins_isa[(int)d->code].leaf_p = true;
34440 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34445 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34446 to return a pointer to VERSION_DECL if the outcome of the expression
34447 formed by PREDICATE_CHAIN is true. This function will be called during
34448 version dispatch to decide which function version to execute. It returns
34449 the basic block at the end, to which more conditions can be added. */
34451 static basic_block
34452 add_condition_to_bb (tree function_decl, tree version_decl,
34453 tree predicate_chain, basic_block new_bb)
34455 gimple return_stmt;
34456 tree convert_expr, result_var;
34457 gimple convert_stmt;
34458 gimple call_cond_stmt;
34459 gimple if_else_stmt;
34461 basic_block bb1, bb2, bb3;
34462 edge e12, e23;
34464 tree cond_var, and_expr_var = NULL_TREE;
34465 gimple_seq gseq;
34467 tree predicate_decl, predicate_arg;
34469 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34471 gcc_assert (new_bb != NULL);
34472 gseq = bb_seq (new_bb);
34475 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34476 build_fold_addr_expr (version_decl));
34477 result_var = create_tmp_var (ptr_type_node);
34478 convert_stmt = gimple_build_assign (result_var, convert_expr);
34479 return_stmt = gimple_build_return (result_var);
34481 if (predicate_chain == NULL_TREE)
34483 gimple_seq_add_stmt (&gseq, convert_stmt);
34484 gimple_seq_add_stmt (&gseq, return_stmt);
34485 set_bb_seq (new_bb, gseq);
34486 gimple_set_bb (convert_stmt, new_bb);
34487 gimple_set_bb (return_stmt, new_bb);
34488 pop_cfun ();
34489 return new_bb;
34492 while (predicate_chain != NULL)
34494 cond_var = create_tmp_var (integer_type_node);
34495 predicate_decl = TREE_PURPOSE (predicate_chain);
34496 predicate_arg = TREE_VALUE (predicate_chain);
34497 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34498 gimple_call_set_lhs (call_cond_stmt, cond_var);
34500 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34501 gimple_set_bb (call_cond_stmt, new_bb);
34502 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34504 predicate_chain = TREE_CHAIN (predicate_chain);
34506 if (and_expr_var == NULL)
34507 and_expr_var = cond_var;
34508 else
34510 gimple assign_stmt;
34511 /* Use MIN_EXPR to check if any integer is zero?.
34512 and_expr_var = min_expr <cond_var, and_expr_var> */
34513 assign_stmt = gimple_build_assign (and_expr_var,
34514 build2 (MIN_EXPR, integer_type_node,
34515 cond_var, and_expr_var));
34517 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34518 gimple_set_bb (assign_stmt, new_bb);
34519 gimple_seq_add_stmt (&gseq, assign_stmt);
34523 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34524 integer_zero_node,
34525 NULL_TREE, NULL_TREE);
34526 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34527 gimple_set_bb (if_else_stmt, new_bb);
34528 gimple_seq_add_stmt (&gseq, if_else_stmt);
34530 gimple_seq_add_stmt (&gseq, convert_stmt);
34531 gimple_seq_add_stmt (&gseq, return_stmt);
34532 set_bb_seq (new_bb, gseq);
34534 bb1 = new_bb;
34535 e12 = split_block (bb1, if_else_stmt);
34536 bb2 = e12->dest;
34537 e12->flags &= ~EDGE_FALLTHRU;
34538 e12->flags |= EDGE_TRUE_VALUE;
34540 e23 = split_block (bb2, return_stmt);
34542 gimple_set_bb (convert_stmt, bb2);
34543 gimple_set_bb (return_stmt, bb2);
34545 bb3 = e23->dest;
34546 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34548 remove_edge (e23);
34549 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34551 pop_cfun ();
34553 return bb3;
34556 /* This parses the attribute arguments to target in DECL and determines
34557 the right builtin to use to match the platform specification.
34558 It returns the priority value for this version decl. If PREDICATE_LIST
34559 is not NULL, it stores the list of cpu features that need to be checked
34560 before dispatching this function. */
34562 static unsigned int
34563 get_builtin_code_for_version (tree decl, tree *predicate_list)
34565 tree attrs;
34566 struct cl_target_option cur_target;
34567 tree target_node;
34568 struct cl_target_option *new_target;
34569 const char *arg_str = NULL;
34570 const char *attrs_str = NULL;
34571 char *tok_str = NULL;
34572 char *token;
34574 /* Priority of i386 features, greater value is higher priority. This is
34575 used to decide the order in which function dispatch must happen. For
34576 instance, a version specialized for SSE4.2 should be checked for dispatch
34577 before a version for SSE3, as SSE4.2 implies SSE3. */
34578 enum feature_priority
34580 P_ZERO = 0,
34581 P_MMX,
34582 P_SSE,
34583 P_SSE2,
34584 P_SSE3,
34585 P_SSSE3,
34586 P_PROC_SSSE3,
34587 P_SSE4_A,
34588 P_PROC_SSE4_A,
34589 P_SSE4_1,
34590 P_SSE4_2,
34591 P_PROC_SSE4_2,
34592 P_POPCNT,
34593 P_AES,
34594 P_PCLMUL,
34595 P_AVX,
34596 P_PROC_AVX,
34597 P_BMI,
34598 P_PROC_BMI,
34599 P_FMA4,
34600 P_XOP,
34601 P_PROC_XOP,
34602 P_FMA,
34603 P_PROC_FMA,
34604 P_BMI2,
34605 P_AVX2,
34606 P_PROC_AVX2,
34607 P_AVX512F,
34608 P_PROC_AVX512F
34611 enum feature_priority priority = P_ZERO;
34613 /* These are the target attribute strings for which a dispatcher is
34614 available, from fold_builtin_cpu. */
34616 static struct _feature_list
34618 const char *const name;
34619 const enum feature_priority priority;
34621 const feature_list[] =
34623 {"mmx", P_MMX},
34624 {"sse", P_SSE},
34625 {"sse2", P_SSE2},
34626 {"sse3", P_SSE3},
34627 {"sse4a", P_SSE4_A},
34628 {"ssse3", P_SSSE3},
34629 {"sse4.1", P_SSE4_1},
34630 {"sse4.2", P_SSE4_2},
34631 {"popcnt", P_POPCNT},
34632 {"aes", P_AES},
34633 {"pclmul", P_PCLMUL},
34634 {"avx", P_AVX},
34635 {"bmi", P_BMI},
34636 {"fma4", P_FMA4},
34637 {"xop", P_XOP},
34638 {"fma", P_FMA},
34639 {"bmi2", P_BMI2},
34640 {"avx2", P_AVX2},
34641 {"avx512f", P_AVX512F}
34645 static unsigned int NUM_FEATURES
34646 = sizeof (feature_list) / sizeof (struct _feature_list);
34648 unsigned int i;
34650 tree predicate_chain = NULL_TREE;
34651 tree predicate_decl, predicate_arg;
34653 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34654 gcc_assert (attrs != NULL);
34656 attrs = TREE_VALUE (TREE_VALUE (attrs));
34658 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34659 attrs_str = TREE_STRING_POINTER (attrs);
34661 /* Return priority zero for default function. */
34662 if (strcmp (attrs_str, "default") == 0)
34663 return 0;
34665 /* Handle arch= if specified. For priority, set it to be 1 more than
34666 the best instruction set the processor can handle. For instance, if
34667 there is a version for atom and a version for ssse3 (the highest ISA
34668 priority for atom), the atom version must be checked for dispatch
34669 before the ssse3 version. */
34670 if (strstr (attrs_str, "arch=") != NULL)
34672 cl_target_option_save (&cur_target, &global_options);
34673 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34674 &global_options_set);
34676 gcc_assert (target_node);
34677 new_target = TREE_TARGET_OPTION (target_node);
34678 gcc_assert (new_target);
34680 if (new_target->arch_specified && new_target->arch > 0)
34682 switch (new_target->arch)
34684 case PROCESSOR_CORE2:
34685 arg_str = "core2";
34686 priority = P_PROC_SSSE3;
34687 break;
34688 case PROCESSOR_NEHALEM:
34689 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34690 arg_str = "westmere";
34691 else
34692 /* We translate "arch=corei7" and "arch=nehalem" to
34693 "corei7" so that it will be mapped to M_INTEL_COREI7
34694 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34695 arg_str = "corei7";
34696 priority = P_PROC_SSE4_2;
34697 break;
34698 case PROCESSOR_SANDYBRIDGE:
34699 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34700 arg_str = "ivybridge";
34701 else
34702 arg_str = "sandybridge";
34703 priority = P_PROC_AVX;
34704 break;
34705 case PROCESSOR_HASWELL:
34706 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34707 arg_str = "broadwell";
34708 else
34709 arg_str = "haswell";
34710 priority = P_PROC_AVX2;
34711 break;
34712 case PROCESSOR_BONNELL:
34713 arg_str = "bonnell";
34714 priority = P_PROC_SSSE3;
34715 break;
34716 case PROCESSOR_KNL:
34717 arg_str = "knl";
34718 priority = P_PROC_AVX512F;
34719 break;
34720 case PROCESSOR_SILVERMONT:
34721 arg_str = "silvermont";
34722 priority = P_PROC_SSE4_2;
34723 break;
34724 case PROCESSOR_AMDFAM10:
34725 arg_str = "amdfam10h";
34726 priority = P_PROC_SSE4_A;
34727 break;
34728 case PROCESSOR_BTVER1:
34729 arg_str = "btver1";
34730 priority = P_PROC_SSE4_A;
34731 break;
34732 case PROCESSOR_BTVER2:
34733 arg_str = "btver2";
34734 priority = P_PROC_BMI;
34735 break;
34736 case PROCESSOR_BDVER1:
34737 arg_str = "bdver1";
34738 priority = P_PROC_XOP;
34739 break;
34740 case PROCESSOR_BDVER2:
34741 arg_str = "bdver2";
34742 priority = P_PROC_FMA;
34743 break;
34744 case PROCESSOR_BDVER3:
34745 arg_str = "bdver3";
34746 priority = P_PROC_FMA;
34747 break;
34748 case PROCESSOR_BDVER4:
34749 arg_str = "bdver4";
34750 priority = P_PROC_AVX2;
34751 break;
34755 cl_target_option_restore (&global_options, &cur_target);
34757 if (predicate_list && arg_str == NULL)
34759 error_at (DECL_SOURCE_LOCATION (decl),
34760 "No dispatcher found for the versioning attributes");
34761 return 0;
34764 if (predicate_list)
34766 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34767 /* For a C string literal the length includes the trailing NULL. */
34768 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34769 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34770 predicate_chain);
34774 /* Process feature name. */
34775 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34776 strcpy (tok_str, attrs_str);
34777 token = strtok (tok_str, ",");
34778 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34780 while (token != NULL)
34782 /* Do not process "arch=" */
34783 if (strncmp (token, "arch=", 5) == 0)
34785 token = strtok (NULL, ",");
34786 continue;
34788 for (i = 0; i < NUM_FEATURES; ++i)
34790 if (strcmp (token, feature_list[i].name) == 0)
34792 if (predicate_list)
34794 predicate_arg = build_string_literal (
34795 strlen (feature_list[i].name) + 1,
34796 feature_list[i].name);
34797 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34798 predicate_chain);
34800 /* Find the maximum priority feature. */
34801 if (feature_list[i].priority > priority)
34802 priority = feature_list[i].priority;
34804 break;
34807 if (predicate_list && i == NUM_FEATURES)
34809 error_at (DECL_SOURCE_LOCATION (decl),
34810 "No dispatcher found for %s", token);
34811 return 0;
34813 token = strtok (NULL, ",");
34815 free (tok_str);
34817 if (predicate_list && predicate_chain == NULL_TREE)
34819 error_at (DECL_SOURCE_LOCATION (decl),
34820 "No dispatcher found for the versioning attributes : %s",
34821 attrs_str);
34822 return 0;
34824 else if (predicate_list)
34826 predicate_chain = nreverse (predicate_chain);
34827 *predicate_list = predicate_chain;
34830 return priority;
34833 /* This compares the priority of target features in function DECL1
34834 and DECL2. It returns positive value if DECL1 is higher priority,
34835 negative value if DECL2 is higher priority and 0 if they are the
34836 same. */
34838 static int
34839 ix86_compare_version_priority (tree decl1, tree decl2)
34841 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34842 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34844 return (int)priority1 - (int)priority2;
34847 /* V1 and V2 point to function versions with different priorities
34848 based on the target ISA. This function compares their priorities. */
34850 static int
34851 feature_compare (const void *v1, const void *v2)
34853 typedef struct _function_version_info
34855 tree version_decl;
34856 tree predicate_chain;
34857 unsigned int dispatch_priority;
34858 } function_version_info;
34860 const function_version_info c1 = *(const function_version_info *)v1;
34861 const function_version_info c2 = *(const function_version_info *)v2;
34862 return (c2.dispatch_priority - c1.dispatch_priority);
34865 /* This function generates the dispatch function for
34866 multi-versioned functions. DISPATCH_DECL is the function which will
34867 contain the dispatch logic. FNDECLS are the function choices for
34868 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34869 in DISPATCH_DECL in which the dispatch code is generated. */
34871 static int
34872 dispatch_function_versions (tree dispatch_decl,
34873 void *fndecls_p,
34874 basic_block *empty_bb)
34876 tree default_decl;
34877 gimple ifunc_cpu_init_stmt;
34878 gimple_seq gseq;
34879 int ix;
34880 tree ele;
34881 vec<tree> *fndecls;
34882 unsigned int num_versions = 0;
34883 unsigned int actual_versions = 0;
34884 unsigned int i;
34886 struct _function_version_info
34888 tree version_decl;
34889 tree predicate_chain;
34890 unsigned int dispatch_priority;
34891 }*function_version_info;
34893 gcc_assert (dispatch_decl != NULL
34894 && fndecls_p != NULL
34895 && empty_bb != NULL);
34897 /*fndecls_p is actually a vector. */
34898 fndecls = static_cast<vec<tree> *> (fndecls_p);
34900 /* At least one more version other than the default. */
34901 num_versions = fndecls->length ();
34902 gcc_assert (num_versions >= 2);
34904 function_version_info = (struct _function_version_info *)
34905 XNEWVEC (struct _function_version_info, (num_versions - 1));
34907 /* The first version in the vector is the default decl. */
34908 default_decl = (*fndecls)[0];
34910 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34912 gseq = bb_seq (*empty_bb);
34913 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34914 constructors, so explicity call __builtin_cpu_init here. */
34915 ifunc_cpu_init_stmt = gimple_build_call_vec (
34916 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34917 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34918 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34919 set_bb_seq (*empty_bb, gseq);
34921 pop_cfun ();
34924 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34926 tree version_decl = ele;
34927 tree predicate_chain = NULL_TREE;
34928 unsigned int priority;
34929 /* Get attribute string, parse it and find the right predicate decl.
34930 The predicate function could be a lengthy combination of many
34931 features, like arch-type and various isa-variants. */
34932 priority = get_builtin_code_for_version (version_decl,
34933 &predicate_chain);
34935 if (predicate_chain == NULL_TREE)
34936 continue;
34938 function_version_info [actual_versions].version_decl = version_decl;
34939 function_version_info [actual_versions].predicate_chain
34940 = predicate_chain;
34941 function_version_info [actual_versions].dispatch_priority = priority;
34942 actual_versions++;
34945 /* Sort the versions according to descending order of dispatch priority. The
34946 priority is based on the ISA. This is not a perfect solution. There
34947 could still be ambiguity. If more than one function version is suitable
34948 to execute, which one should be dispatched? In future, allow the user
34949 to specify a dispatch priority next to the version. */
34950 qsort (function_version_info, actual_versions,
34951 sizeof (struct _function_version_info), feature_compare);
34953 for (i = 0; i < actual_versions; ++i)
34954 *empty_bb = add_condition_to_bb (dispatch_decl,
34955 function_version_info[i].version_decl,
34956 function_version_info[i].predicate_chain,
34957 *empty_bb);
34959 /* dispatch default version at the end. */
34960 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34961 NULL, *empty_bb);
34963 free (function_version_info);
34964 return 0;
34967 /* Comparator function to be used in qsort routine to sort attribute
34968 specification strings to "target". */
34970 static int
34971 attr_strcmp (const void *v1, const void *v2)
34973 const char *c1 = *(char *const*)v1;
34974 const char *c2 = *(char *const*)v2;
34975 return strcmp (c1, c2);
34978 /* ARGLIST is the argument to target attribute. This function tokenizes
34979 the comma separated arguments, sorts them and returns a string which
34980 is a unique identifier for the comma separated arguments. It also
34981 replaces non-identifier characters "=,-" with "_". */
34983 static char *
34984 sorted_attr_string (tree arglist)
34986 tree arg;
34987 size_t str_len_sum = 0;
34988 char **args = NULL;
34989 char *attr_str, *ret_str;
34990 char *attr = NULL;
34991 unsigned int argnum = 1;
34992 unsigned int i;
34994 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34996 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34997 size_t len = strlen (str);
34998 str_len_sum += len + 1;
34999 if (arg != arglist)
35000 argnum++;
35001 for (i = 0; i < strlen (str); i++)
35002 if (str[i] == ',')
35003 argnum++;
35006 attr_str = XNEWVEC (char, str_len_sum);
35007 str_len_sum = 0;
35008 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35010 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35011 size_t len = strlen (str);
35012 memcpy (attr_str + str_len_sum, str, len);
35013 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
35014 str_len_sum += len + 1;
35017 /* Replace "=,-" with "_". */
35018 for (i = 0; i < strlen (attr_str); i++)
35019 if (attr_str[i] == '=' || attr_str[i]== '-')
35020 attr_str[i] = '_';
35022 if (argnum == 1)
35023 return attr_str;
35025 args = XNEWVEC (char *, argnum);
35027 i = 0;
35028 attr = strtok (attr_str, ",");
35029 while (attr != NULL)
35031 args[i] = attr;
35032 i++;
35033 attr = strtok (NULL, ",");
35036 qsort (args, argnum, sizeof (char *), attr_strcmp);
35038 ret_str = XNEWVEC (char, str_len_sum);
35039 str_len_sum = 0;
35040 for (i = 0; i < argnum; i++)
35042 size_t len = strlen (args[i]);
35043 memcpy (ret_str + str_len_sum, args[i], len);
35044 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
35045 str_len_sum += len + 1;
35048 XDELETEVEC (args);
35049 XDELETEVEC (attr_str);
35050 return ret_str;
35053 /* This function changes the assembler name for functions that are
35054 versions. If DECL is a function version and has a "target"
35055 attribute, it appends the attribute string to its assembler name. */
35057 static tree
35058 ix86_mangle_function_version_assembler_name (tree decl, tree id)
35060 tree version_attr;
35061 const char *orig_name, *version_string;
35062 char *attr_str, *assembler_name;
35064 if (DECL_DECLARED_INLINE_P (decl)
35065 && lookup_attribute ("gnu_inline",
35066 DECL_ATTRIBUTES (decl)))
35067 error_at (DECL_SOURCE_LOCATION (decl),
35068 "Function versions cannot be marked as gnu_inline,"
35069 " bodies have to be generated");
35071 if (DECL_VIRTUAL_P (decl)
35072 || DECL_VINDEX (decl))
35073 sorry ("Virtual function multiversioning not supported");
35075 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35077 /* target attribute string cannot be NULL. */
35078 gcc_assert (version_attr != NULL_TREE);
35080 orig_name = IDENTIFIER_POINTER (id);
35081 version_string
35082 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35084 if (strcmp (version_string, "default") == 0)
35085 return id;
35087 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35088 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35090 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35092 /* Allow assembler name to be modified if already set. */
35093 if (DECL_ASSEMBLER_NAME_SET_P (decl))
35094 SET_DECL_RTL (decl, NULL);
35096 tree ret = get_identifier (assembler_name);
35097 XDELETEVEC (attr_str);
35098 XDELETEVEC (assembler_name);
35099 return ret;
35102 /* This function returns true if FN1 and FN2 are versions of the same function,
35103 that is, the target strings of the function decls are different. This assumes
35104 that FN1 and FN2 have the same signature. */
35106 static bool
35107 ix86_function_versions (tree fn1, tree fn2)
35109 tree attr1, attr2;
35110 char *target1, *target2;
35111 bool result;
35113 if (TREE_CODE (fn1) != FUNCTION_DECL
35114 || TREE_CODE (fn2) != FUNCTION_DECL)
35115 return false;
35117 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35118 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35120 /* At least one function decl should have the target attribute specified. */
35121 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35122 return false;
35124 /* Diagnose missing target attribute if one of the decls is already
35125 multi-versioned. */
35126 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35128 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35130 if (attr2 != NULL_TREE)
35132 std::swap (fn1, fn2);
35133 attr1 = attr2;
35135 error_at (DECL_SOURCE_LOCATION (fn2),
35136 "missing %<target%> attribute for multi-versioned %D",
35137 fn2);
35138 inform (DECL_SOURCE_LOCATION (fn1),
35139 "previous declaration of %D", fn1);
35140 /* Prevent diagnosing of the same error multiple times. */
35141 DECL_ATTRIBUTES (fn2)
35142 = tree_cons (get_identifier ("target"),
35143 copy_node (TREE_VALUE (attr1)),
35144 DECL_ATTRIBUTES (fn2));
35146 return false;
35149 target1 = sorted_attr_string (TREE_VALUE (attr1));
35150 target2 = sorted_attr_string (TREE_VALUE (attr2));
35152 /* The sorted target strings must be different for fn1 and fn2
35153 to be versions. */
35154 if (strcmp (target1, target2) == 0)
35155 result = false;
35156 else
35157 result = true;
35159 XDELETEVEC (target1);
35160 XDELETEVEC (target2);
35162 return result;
35165 static tree
35166 ix86_mangle_decl_assembler_name (tree decl, tree id)
35168 /* For function version, add the target suffix to the assembler name. */
35169 if (TREE_CODE (decl) == FUNCTION_DECL
35170 && DECL_FUNCTION_VERSIONED (decl))
35171 id = ix86_mangle_function_version_assembler_name (decl, id);
35172 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35173 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35174 #endif
35176 return id;
35179 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35180 is true, append the full path name of the source file. */
35182 static char *
35183 make_name (tree decl, const char *suffix, bool make_unique)
35185 char *global_var_name;
35186 int name_len;
35187 const char *name;
35188 const char *unique_name = NULL;
35190 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35192 /* Get a unique name that can be used globally without any chances
35193 of collision at link time. */
35194 if (make_unique)
35195 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35197 name_len = strlen (name) + strlen (suffix) + 2;
35199 if (make_unique)
35200 name_len += strlen (unique_name) + 1;
35201 global_var_name = XNEWVEC (char, name_len);
35203 /* Use '.' to concatenate names as it is demangler friendly. */
35204 if (make_unique)
35205 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35206 suffix);
35207 else
35208 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35210 return global_var_name;
35213 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35215 /* Make a dispatcher declaration for the multi-versioned function DECL.
35216 Calls to DECL function will be replaced with calls to the dispatcher
35217 by the front-end. Return the decl created. */
35219 static tree
35220 make_dispatcher_decl (const tree decl)
35222 tree func_decl;
35223 char *func_name;
35224 tree fn_type, func_type;
35225 bool is_uniq = false;
35227 if (TREE_PUBLIC (decl) == 0)
35228 is_uniq = true;
35230 func_name = make_name (decl, "ifunc", is_uniq);
35232 fn_type = TREE_TYPE (decl);
35233 func_type = build_function_type (TREE_TYPE (fn_type),
35234 TYPE_ARG_TYPES (fn_type));
35236 func_decl = build_fn_decl (func_name, func_type);
35237 XDELETEVEC (func_name);
35238 TREE_USED (func_decl) = 1;
35239 DECL_CONTEXT (func_decl) = NULL_TREE;
35240 DECL_INITIAL (func_decl) = error_mark_node;
35241 DECL_ARTIFICIAL (func_decl) = 1;
35242 /* Mark this func as external, the resolver will flip it again if
35243 it gets generated. */
35244 DECL_EXTERNAL (func_decl) = 1;
35245 /* This will be of type IFUNCs have to be externally visible. */
35246 TREE_PUBLIC (func_decl) = 1;
35248 return func_decl;
35251 #endif
35253 /* Returns true if decl is multi-versioned and DECL is the default function,
35254 that is it is not tagged with target specific optimization. */
35256 static bool
35257 is_function_default_version (const tree decl)
35259 if (TREE_CODE (decl) != FUNCTION_DECL
35260 || !DECL_FUNCTION_VERSIONED (decl))
35261 return false;
35262 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35263 gcc_assert (attr);
35264 attr = TREE_VALUE (TREE_VALUE (attr));
35265 return (TREE_CODE (attr) == STRING_CST
35266 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35269 /* Make a dispatcher declaration for the multi-versioned function DECL.
35270 Calls to DECL function will be replaced with calls to the dispatcher
35271 by the front-end. Returns the decl of the dispatcher function. */
35273 static tree
35274 ix86_get_function_versions_dispatcher (void *decl)
35276 tree fn = (tree) decl;
35277 struct cgraph_node *node = NULL;
35278 struct cgraph_node *default_node = NULL;
35279 struct cgraph_function_version_info *node_v = NULL;
35280 struct cgraph_function_version_info *first_v = NULL;
35282 tree dispatch_decl = NULL;
35284 struct cgraph_function_version_info *default_version_info = NULL;
35286 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35288 node = cgraph_node::get (fn);
35289 gcc_assert (node != NULL);
35291 node_v = node->function_version ();
35292 gcc_assert (node_v != NULL);
35294 if (node_v->dispatcher_resolver != NULL)
35295 return node_v->dispatcher_resolver;
35297 /* Find the default version and make it the first node. */
35298 first_v = node_v;
35299 /* Go to the beginning of the chain. */
35300 while (first_v->prev != NULL)
35301 first_v = first_v->prev;
35302 default_version_info = first_v;
35303 while (default_version_info != NULL)
35305 if (is_function_default_version
35306 (default_version_info->this_node->decl))
35307 break;
35308 default_version_info = default_version_info->next;
35311 /* If there is no default node, just return NULL. */
35312 if (default_version_info == NULL)
35313 return NULL;
35315 /* Make default info the first node. */
35316 if (first_v != default_version_info)
35318 default_version_info->prev->next = default_version_info->next;
35319 if (default_version_info->next)
35320 default_version_info->next->prev = default_version_info->prev;
35321 first_v->prev = default_version_info;
35322 default_version_info->next = first_v;
35323 default_version_info->prev = NULL;
35326 default_node = default_version_info->this_node;
35328 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35329 if (targetm.has_ifunc_p ())
35331 struct cgraph_function_version_info *it_v = NULL;
35332 struct cgraph_node *dispatcher_node = NULL;
35333 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35335 /* Right now, the dispatching is done via ifunc. */
35336 dispatch_decl = make_dispatcher_decl (default_node->decl);
35338 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35339 gcc_assert (dispatcher_node != NULL);
35340 dispatcher_node->dispatcher_function = 1;
35341 dispatcher_version_info
35342 = dispatcher_node->insert_new_function_version ();
35343 dispatcher_version_info->next = default_version_info;
35344 dispatcher_node->definition = 1;
35346 /* Set the dispatcher for all the versions. */
35347 it_v = default_version_info;
35348 while (it_v != NULL)
35350 it_v->dispatcher_resolver = dispatch_decl;
35351 it_v = it_v->next;
35354 else
35355 #endif
35357 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35358 "multiversioning needs ifunc which is not supported "
35359 "on this target");
35362 return dispatch_decl;
35365 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35366 it to CHAIN. */
35368 static tree
35369 make_attribute (const char *name, const char *arg_name, tree chain)
35371 tree attr_name;
35372 tree attr_arg_name;
35373 tree attr_args;
35374 tree attr;
35376 attr_name = get_identifier (name);
35377 attr_arg_name = build_string (strlen (arg_name), arg_name);
35378 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35379 attr = tree_cons (attr_name, attr_args, chain);
35380 return attr;
35383 /* Make the resolver function decl to dispatch the versions of
35384 a multi-versioned function, DEFAULT_DECL. Create an
35385 empty basic block in the resolver and store the pointer in
35386 EMPTY_BB. Return the decl of the resolver function. */
35388 static tree
35389 make_resolver_func (const tree default_decl,
35390 const tree dispatch_decl,
35391 basic_block *empty_bb)
35393 char *resolver_name;
35394 tree decl, type, decl_name, t;
35395 bool is_uniq = false;
35397 /* IFUNC's have to be globally visible. So, if the default_decl is
35398 not, then the name of the IFUNC should be made unique. */
35399 if (TREE_PUBLIC (default_decl) == 0)
35400 is_uniq = true;
35402 /* Append the filename to the resolver function if the versions are
35403 not externally visible. This is because the resolver function has
35404 to be externally visible for the loader to find it. So, appending
35405 the filename will prevent conflicts with a resolver function from
35406 another module which is based on the same version name. */
35407 resolver_name = make_name (default_decl, "resolver", is_uniq);
35409 /* The resolver function should return a (void *). */
35410 type = build_function_type_list (ptr_type_node, NULL_TREE);
35412 decl = build_fn_decl (resolver_name, type);
35413 decl_name = get_identifier (resolver_name);
35414 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35416 DECL_NAME (decl) = decl_name;
35417 TREE_USED (decl) = 1;
35418 DECL_ARTIFICIAL (decl) = 1;
35419 DECL_IGNORED_P (decl) = 0;
35420 /* IFUNC resolvers have to be externally visible. */
35421 TREE_PUBLIC (decl) = 1;
35422 DECL_UNINLINABLE (decl) = 1;
35424 /* Resolver is not external, body is generated. */
35425 DECL_EXTERNAL (decl) = 0;
35426 DECL_EXTERNAL (dispatch_decl) = 0;
35428 DECL_CONTEXT (decl) = NULL_TREE;
35429 DECL_INITIAL (decl) = make_node (BLOCK);
35430 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35432 if (DECL_COMDAT_GROUP (default_decl)
35433 || TREE_PUBLIC (default_decl))
35435 /* In this case, each translation unit with a call to this
35436 versioned function will put out a resolver. Ensure it
35437 is comdat to keep just one copy. */
35438 DECL_COMDAT (decl) = 1;
35439 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35441 /* Build result decl and add to function_decl. */
35442 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35443 DECL_ARTIFICIAL (t) = 1;
35444 DECL_IGNORED_P (t) = 1;
35445 DECL_RESULT (decl) = t;
35447 gimplify_function_tree (decl);
35448 push_cfun (DECL_STRUCT_FUNCTION (decl));
35449 *empty_bb = init_lowered_empty_function (decl, false, 0);
35451 cgraph_node::add_new_function (decl, true);
35452 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35454 pop_cfun ();
35456 gcc_assert (dispatch_decl != NULL);
35457 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35458 DECL_ATTRIBUTES (dispatch_decl)
35459 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35461 /* Create the alias for dispatch to resolver here. */
35462 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35463 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35464 XDELETEVEC (resolver_name);
35465 return decl;
35468 /* Generate the dispatching code body to dispatch multi-versioned function
35469 DECL. The target hook is called to process the "target" attributes and
35470 provide the code to dispatch the right function at run-time. NODE points
35471 to the dispatcher decl whose body will be created. */
35473 static tree
35474 ix86_generate_version_dispatcher_body (void *node_p)
35476 tree resolver_decl;
35477 basic_block empty_bb;
35478 tree default_ver_decl;
35479 struct cgraph_node *versn;
35480 struct cgraph_node *node;
35482 struct cgraph_function_version_info *node_version_info = NULL;
35483 struct cgraph_function_version_info *versn_info = NULL;
35485 node = (cgraph_node *)node_p;
35487 node_version_info = node->function_version ();
35488 gcc_assert (node->dispatcher_function
35489 && node_version_info != NULL);
35491 if (node_version_info->dispatcher_resolver)
35492 return node_version_info->dispatcher_resolver;
35494 /* The first version in the chain corresponds to the default version. */
35495 default_ver_decl = node_version_info->next->this_node->decl;
35497 /* node is going to be an alias, so remove the finalized bit. */
35498 node->definition = false;
35500 resolver_decl = make_resolver_func (default_ver_decl,
35501 node->decl, &empty_bb);
35503 node_version_info->dispatcher_resolver = resolver_decl;
35505 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35507 auto_vec<tree, 2> fn_ver_vec;
35509 for (versn_info = node_version_info->next; versn_info;
35510 versn_info = versn_info->next)
35512 versn = versn_info->this_node;
35513 /* Check for virtual functions here again, as by this time it should
35514 have been determined if this function needs a vtable index or
35515 not. This happens for methods in derived classes that override
35516 virtual methods in base classes but are not explicitly marked as
35517 virtual. */
35518 if (DECL_VINDEX (versn->decl))
35519 sorry ("Virtual function multiversioning not supported");
35521 fn_ver_vec.safe_push (versn->decl);
35524 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35525 cgraph_edge::rebuild_edges ();
35526 pop_cfun ();
35527 return resolver_decl;
35529 /* This builds the processor_model struct type defined in
35530 libgcc/config/i386/cpuinfo.c */
35532 static tree
35533 build_processor_model_struct (void)
35535 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35536 "__cpu_features"};
35537 tree field = NULL_TREE, field_chain = NULL_TREE;
35538 int i;
35539 tree type = make_node (RECORD_TYPE);
35541 /* The first 3 fields are unsigned int. */
35542 for (i = 0; i < 3; ++i)
35544 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35545 get_identifier (field_name[i]), unsigned_type_node);
35546 if (field_chain != NULL_TREE)
35547 DECL_CHAIN (field) = field_chain;
35548 field_chain = field;
35551 /* The last field is an array of unsigned integers of size one. */
35552 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35553 get_identifier (field_name[3]),
35554 build_array_type (unsigned_type_node,
35555 build_index_type (size_one_node)));
35556 if (field_chain != NULL_TREE)
35557 DECL_CHAIN (field) = field_chain;
35558 field_chain = field;
35560 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35561 return type;
35564 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35566 static tree
35567 make_var_decl (tree type, const char *name)
35569 tree new_decl;
35571 new_decl = build_decl (UNKNOWN_LOCATION,
35572 VAR_DECL,
35573 get_identifier(name),
35574 type);
35576 DECL_EXTERNAL (new_decl) = 1;
35577 TREE_STATIC (new_decl) = 1;
35578 TREE_PUBLIC (new_decl) = 1;
35579 DECL_INITIAL (new_decl) = 0;
35580 DECL_ARTIFICIAL (new_decl) = 0;
35581 DECL_PRESERVE_P (new_decl) = 1;
35583 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35584 assemble_variable (new_decl, 0, 0, 0);
35586 return new_decl;
35589 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35590 into an integer defined in libgcc/config/i386/cpuinfo.c */
35592 static tree
35593 fold_builtin_cpu (tree fndecl, tree *args)
35595 unsigned int i;
35596 enum ix86_builtins fn_code = (enum ix86_builtins)
35597 DECL_FUNCTION_CODE (fndecl);
35598 tree param_string_cst = NULL;
35600 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35601 enum processor_features
35603 F_CMOV = 0,
35604 F_MMX,
35605 F_POPCNT,
35606 F_SSE,
35607 F_SSE2,
35608 F_SSE3,
35609 F_SSSE3,
35610 F_SSE4_1,
35611 F_SSE4_2,
35612 F_AVX,
35613 F_AVX2,
35614 F_SSE4_A,
35615 F_FMA4,
35616 F_XOP,
35617 F_FMA,
35618 F_AVX512F,
35619 F_BMI,
35620 F_BMI2,
35621 F_AES,
35622 F_PCLMUL,
35623 F_MAX
35626 /* These are the values for vendor types and cpu types and subtypes
35627 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35628 the corresponding start value. */
35629 enum processor_model
35631 M_INTEL = 1,
35632 M_AMD,
35633 M_CPU_TYPE_START,
35634 M_INTEL_BONNELL,
35635 M_INTEL_CORE2,
35636 M_INTEL_COREI7,
35637 M_AMDFAM10H,
35638 M_AMDFAM15H,
35639 M_INTEL_SILVERMONT,
35640 M_INTEL_KNL,
35641 M_AMD_BTVER1,
35642 M_AMD_BTVER2,
35643 M_CPU_SUBTYPE_START,
35644 M_INTEL_COREI7_NEHALEM,
35645 M_INTEL_COREI7_WESTMERE,
35646 M_INTEL_COREI7_SANDYBRIDGE,
35647 M_AMDFAM10H_BARCELONA,
35648 M_AMDFAM10H_SHANGHAI,
35649 M_AMDFAM10H_ISTANBUL,
35650 M_AMDFAM15H_BDVER1,
35651 M_AMDFAM15H_BDVER2,
35652 M_AMDFAM15H_BDVER3,
35653 M_AMDFAM15H_BDVER4,
35654 M_INTEL_COREI7_IVYBRIDGE,
35655 M_INTEL_COREI7_HASWELL,
35656 M_INTEL_COREI7_BROADWELL,
35657 M_INTEL_COREI7_SKYLAKE
35660 static struct _arch_names_table
35662 const char *const name;
35663 const enum processor_model model;
35665 const arch_names_table[] =
35667 {"amd", M_AMD},
35668 {"intel", M_INTEL},
35669 {"atom", M_INTEL_BONNELL},
35670 {"slm", M_INTEL_SILVERMONT},
35671 {"core2", M_INTEL_CORE2},
35672 {"corei7", M_INTEL_COREI7},
35673 {"nehalem", M_INTEL_COREI7_NEHALEM},
35674 {"westmere", M_INTEL_COREI7_WESTMERE},
35675 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35676 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35677 {"haswell", M_INTEL_COREI7_HASWELL},
35678 {"broadwell", M_INTEL_COREI7_BROADWELL},
35679 {"skylake", M_INTEL_COREI7_SKYLAKE},
35680 {"bonnell", M_INTEL_BONNELL},
35681 {"silvermont", M_INTEL_SILVERMONT},
35682 {"knl", M_INTEL_KNL},
35683 {"amdfam10h", M_AMDFAM10H},
35684 {"barcelona", M_AMDFAM10H_BARCELONA},
35685 {"shanghai", M_AMDFAM10H_SHANGHAI},
35686 {"istanbul", M_AMDFAM10H_ISTANBUL},
35687 {"btver1", M_AMD_BTVER1},
35688 {"amdfam15h", M_AMDFAM15H},
35689 {"bdver1", M_AMDFAM15H_BDVER1},
35690 {"bdver2", M_AMDFAM15H_BDVER2},
35691 {"bdver3", M_AMDFAM15H_BDVER3},
35692 {"bdver4", M_AMDFAM15H_BDVER4},
35693 {"btver2", M_AMD_BTVER2},
35696 static struct _isa_names_table
35698 const char *const name;
35699 const enum processor_features feature;
35701 const isa_names_table[] =
35703 {"cmov", F_CMOV},
35704 {"mmx", F_MMX},
35705 {"popcnt", F_POPCNT},
35706 {"sse", F_SSE},
35707 {"sse2", F_SSE2},
35708 {"sse3", F_SSE3},
35709 {"ssse3", F_SSSE3},
35710 {"sse4a", F_SSE4_A},
35711 {"sse4.1", F_SSE4_1},
35712 {"sse4.2", F_SSE4_2},
35713 {"avx", F_AVX},
35714 {"fma4", F_FMA4},
35715 {"xop", F_XOP},
35716 {"fma", F_FMA},
35717 {"avx2", F_AVX2},
35718 {"avx512f",F_AVX512F},
35719 {"bmi", F_BMI},
35720 {"bmi2", F_BMI2},
35721 {"aes", F_AES},
35722 {"pclmul", F_PCLMUL}
35725 tree __processor_model_type = build_processor_model_struct ();
35726 tree __cpu_model_var = make_var_decl (__processor_model_type,
35727 "__cpu_model");
35730 varpool_node::add (__cpu_model_var);
35732 gcc_assert ((args != NULL) && (*args != NULL));
35734 param_string_cst = *args;
35735 while (param_string_cst
35736 && TREE_CODE (param_string_cst) != STRING_CST)
35738 /* *args must be a expr that can contain other EXPRS leading to a
35739 STRING_CST. */
35740 if (!EXPR_P (param_string_cst))
35742 error ("Parameter to builtin must be a string constant or literal");
35743 return integer_zero_node;
35745 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35748 gcc_assert (param_string_cst);
35750 if (fn_code == IX86_BUILTIN_CPU_IS)
35752 tree ref;
35753 tree field;
35754 tree final;
35756 unsigned int field_val = 0;
35757 unsigned int NUM_ARCH_NAMES
35758 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35760 for (i = 0; i < NUM_ARCH_NAMES; i++)
35761 if (strcmp (arch_names_table[i].name,
35762 TREE_STRING_POINTER (param_string_cst)) == 0)
35763 break;
35765 if (i == NUM_ARCH_NAMES)
35767 error ("Parameter to builtin not valid: %s",
35768 TREE_STRING_POINTER (param_string_cst));
35769 return integer_zero_node;
35772 field = TYPE_FIELDS (__processor_model_type);
35773 field_val = arch_names_table[i].model;
35775 /* CPU types are stored in the next field. */
35776 if (field_val > M_CPU_TYPE_START
35777 && field_val < M_CPU_SUBTYPE_START)
35779 field = DECL_CHAIN (field);
35780 field_val -= M_CPU_TYPE_START;
35783 /* CPU subtypes are stored in the next field. */
35784 if (field_val > M_CPU_SUBTYPE_START)
35786 field = DECL_CHAIN ( DECL_CHAIN (field));
35787 field_val -= M_CPU_SUBTYPE_START;
35790 /* Get the appropriate field in __cpu_model. */
35791 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35792 field, NULL_TREE);
35794 /* Check the value. */
35795 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35796 build_int_cstu (unsigned_type_node, field_val));
35797 return build1 (CONVERT_EXPR, integer_type_node, final);
35799 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35801 tree ref;
35802 tree array_elt;
35803 tree field;
35804 tree final;
35806 unsigned int field_val = 0;
35807 unsigned int NUM_ISA_NAMES
35808 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35810 for (i = 0; i < NUM_ISA_NAMES; i++)
35811 if (strcmp (isa_names_table[i].name,
35812 TREE_STRING_POINTER (param_string_cst)) == 0)
35813 break;
35815 if (i == NUM_ISA_NAMES)
35817 error ("Parameter to builtin not valid: %s",
35818 TREE_STRING_POINTER (param_string_cst));
35819 return integer_zero_node;
35822 field = TYPE_FIELDS (__processor_model_type);
35823 /* Get the last field, which is __cpu_features. */
35824 while (DECL_CHAIN (field))
35825 field = DECL_CHAIN (field);
35827 /* Get the appropriate field: __cpu_model.__cpu_features */
35828 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35829 field, NULL_TREE);
35831 /* Access the 0th element of __cpu_features array. */
35832 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35833 integer_zero_node, NULL_TREE, NULL_TREE);
35835 field_val = (1 << isa_names_table[i].feature);
35836 /* Return __cpu_model.__cpu_features[0] & field_val */
35837 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35838 build_int_cstu (unsigned_type_node, field_val));
35839 return build1 (CONVERT_EXPR, integer_type_node, final);
35841 gcc_unreachable ();
35844 static tree
35845 ix86_fold_builtin (tree fndecl, int n_args,
35846 tree *args, bool ignore ATTRIBUTE_UNUSED)
35848 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35850 enum ix86_builtins fn_code = (enum ix86_builtins)
35851 DECL_FUNCTION_CODE (fndecl);
35852 if (fn_code == IX86_BUILTIN_CPU_IS
35853 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35855 gcc_assert (n_args == 1);
35856 return fold_builtin_cpu (fndecl, args);
35860 #ifdef SUBTARGET_FOLD_BUILTIN
35861 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35862 #endif
35864 return NULL_TREE;
35867 /* Make builtins to detect cpu type and features supported. NAME is
35868 the builtin name, CODE is the builtin code, and FTYPE is the function
35869 type of the builtin. */
35871 static void
35872 make_cpu_type_builtin (const char* name, int code,
35873 enum ix86_builtin_func_type ftype, bool is_const)
35875 tree decl;
35876 tree type;
35878 type = ix86_get_builtin_func_type (ftype);
35879 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35880 NULL, NULL_TREE);
35881 gcc_assert (decl != NULL_TREE);
35882 ix86_builtins[(int) code] = decl;
35883 TREE_READONLY (decl) = is_const;
35886 /* Make builtins to get CPU type and features supported. The created
35887 builtins are :
35889 __builtin_cpu_init (), to detect cpu type and features,
35890 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35891 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35894 static void
35895 ix86_init_platform_type_builtins (void)
35897 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35898 INT_FTYPE_VOID, false);
35899 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35900 INT_FTYPE_PCCHAR, true);
35901 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35902 INT_FTYPE_PCCHAR, true);
35905 /* Internal method for ix86_init_builtins. */
35907 static void
35908 ix86_init_builtins_va_builtins_abi (void)
35910 tree ms_va_ref, sysv_va_ref;
35911 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35912 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35913 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35914 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35916 if (!TARGET_64BIT)
35917 return;
35918 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35919 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35920 ms_va_ref = build_reference_type (ms_va_list_type_node);
35921 sysv_va_ref =
35922 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35924 fnvoid_va_end_ms =
35925 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35926 fnvoid_va_start_ms =
35927 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35928 fnvoid_va_end_sysv =
35929 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35930 fnvoid_va_start_sysv =
35931 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35932 NULL_TREE);
35933 fnvoid_va_copy_ms =
35934 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35935 NULL_TREE);
35936 fnvoid_va_copy_sysv =
35937 build_function_type_list (void_type_node, sysv_va_ref,
35938 sysv_va_ref, NULL_TREE);
35940 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35941 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35942 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35943 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35944 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35945 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35946 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35947 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35948 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35949 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35950 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35951 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35954 static void
35955 ix86_init_builtin_types (void)
35957 tree float128_type_node, float80_type_node;
35959 /* The __float80 type. */
35960 float80_type_node = long_double_type_node;
35961 if (TYPE_MODE (float80_type_node) != XFmode)
35963 /* The __float80 type. */
35964 float80_type_node = make_node (REAL_TYPE);
35966 TYPE_PRECISION (float80_type_node) = 80;
35967 layout_type (float80_type_node);
35969 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35971 /* The __float128 type. */
35972 float128_type_node = make_node (REAL_TYPE);
35973 TYPE_PRECISION (float128_type_node) = 128;
35974 layout_type (float128_type_node);
35975 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35977 /* This macro is built by i386-builtin-types.awk. */
35978 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35981 static void
35982 ix86_init_builtins (void)
35984 tree t;
35986 ix86_init_builtin_types ();
35988 /* Builtins to get CPU type and features. */
35989 ix86_init_platform_type_builtins ();
35991 /* TFmode support builtins. */
35992 def_builtin_const (0, "__builtin_infq",
35993 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35994 def_builtin_const (0, "__builtin_huge_valq",
35995 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35997 /* We will expand them to normal call if SSE isn't available since
35998 they are used by libgcc. */
35999 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
36000 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
36001 BUILT_IN_MD, "__fabstf2", NULL_TREE);
36002 TREE_READONLY (t) = 1;
36003 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
36005 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
36006 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
36007 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
36008 TREE_READONLY (t) = 1;
36009 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
36011 ix86_init_tm_builtins ();
36012 ix86_init_mmx_sse_builtins ();
36013 ix86_init_mpx_builtins ();
36015 if (TARGET_LP64)
36016 ix86_init_builtins_va_builtins_abi ();
36018 #ifdef SUBTARGET_INIT_BUILTINS
36019 SUBTARGET_INIT_BUILTINS;
36020 #endif
36023 /* Return the ix86 builtin for CODE. */
36025 static tree
36026 ix86_builtin_decl (unsigned code, bool)
36028 if (code >= IX86_BUILTIN_MAX)
36029 return error_mark_node;
36031 return ix86_builtins[code];
36034 /* Errors in the source file can cause expand_expr to return const0_rtx
36035 where we expect a vector. To avoid crashing, use one of the vector
36036 clear instructions. */
36037 static rtx
36038 safe_vector_operand (rtx x, machine_mode mode)
36040 if (x == const0_rtx)
36041 x = CONST0_RTX (mode);
36042 return x;
36045 /* Fixup modeless constants to fit required mode. */
36046 static rtx
36047 fixup_modeless_constant (rtx x, machine_mode mode)
36049 if (GET_MODE (x) == VOIDmode)
36050 x = convert_to_mode (mode, x, 1);
36051 return x;
36054 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
36056 static rtx
36057 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
36059 rtx pat;
36060 tree arg0 = CALL_EXPR_ARG (exp, 0);
36061 tree arg1 = CALL_EXPR_ARG (exp, 1);
36062 rtx op0 = expand_normal (arg0);
36063 rtx op1 = expand_normal (arg1);
36064 machine_mode tmode = insn_data[icode].operand[0].mode;
36065 machine_mode mode0 = insn_data[icode].operand[1].mode;
36066 machine_mode mode1 = insn_data[icode].operand[2].mode;
36068 if (VECTOR_MODE_P (mode0))
36069 op0 = safe_vector_operand (op0, mode0);
36070 if (VECTOR_MODE_P (mode1))
36071 op1 = safe_vector_operand (op1, mode1);
36073 if (optimize || !target
36074 || GET_MODE (target) != tmode
36075 || !insn_data[icode].operand[0].predicate (target, tmode))
36076 target = gen_reg_rtx (tmode);
36078 if (GET_MODE (op1) == SImode && mode1 == TImode)
36080 rtx x = gen_reg_rtx (V4SImode);
36081 emit_insn (gen_sse2_loadd (x, op1));
36082 op1 = gen_lowpart (TImode, x);
36085 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36086 op0 = copy_to_mode_reg (mode0, op0);
36087 if (!insn_data[icode].operand[2].predicate (op1, mode1))
36088 op1 = copy_to_mode_reg (mode1, op1);
36090 pat = GEN_FCN (icode) (target, op0, op1);
36091 if (! pat)
36092 return 0;
36094 emit_insn (pat);
36096 return target;
36099 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
36101 static rtx
36102 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36103 enum ix86_builtin_func_type m_type,
36104 enum rtx_code sub_code)
36106 rtx pat;
36107 int i;
36108 int nargs;
36109 bool comparison_p = false;
36110 bool tf_p = false;
36111 bool last_arg_constant = false;
36112 int num_memory = 0;
36113 struct {
36114 rtx op;
36115 machine_mode mode;
36116 } args[4];
36118 machine_mode tmode = insn_data[icode].operand[0].mode;
36120 switch (m_type)
36122 case MULTI_ARG_4_DF2_DI_I:
36123 case MULTI_ARG_4_DF2_DI_I1:
36124 case MULTI_ARG_4_SF2_SI_I:
36125 case MULTI_ARG_4_SF2_SI_I1:
36126 nargs = 4;
36127 last_arg_constant = true;
36128 break;
36130 case MULTI_ARG_3_SF:
36131 case MULTI_ARG_3_DF:
36132 case MULTI_ARG_3_SF2:
36133 case MULTI_ARG_3_DF2:
36134 case MULTI_ARG_3_DI:
36135 case MULTI_ARG_3_SI:
36136 case MULTI_ARG_3_SI_DI:
36137 case MULTI_ARG_3_HI:
36138 case MULTI_ARG_3_HI_SI:
36139 case MULTI_ARG_3_QI:
36140 case MULTI_ARG_3_DI2:
36141 case MULTI_ARG_3_SI2:
36142 case MULTI_ARG_3_HI2:
36143 case MULTI_ARG_3_QI2:
36144 nargs = 3;
36145 break;
36147 case MULTI_ARG_2_SF:
36148 case MULTI_ARG_2_DF:
36149 case MULTI_ARG_2_DI:
36150 case MULTI_ARG_2_SI:
36151 case MULTI_ARG_2_HI:
36152 case MULTI_ARG_2_QI:
36153 nargs = 2;
36154 break;
36156 case MULTI_ARG_2_DI_IMM:
36157 case MULTI_ARG_2_SI_IMM:
36158 case MULTI_ARG_2_HI_IMM:
36159 case MULTI_ARG_2_QI_IMM:
36160 nargs = 2;
36161 last_arg_constant = true;
36162 break;
36164 case MULTI_ARG_1_SF:
36165 case MULTI_ARG_1_DF:
36166 case MULTI_ARG_1_SF2:
36167 case MULTI_ARG_1_DF2:
36168 case MULTI_ARG_1_DI:
36169 case MULTI_ARG_1_SI:
36170 case MULTI_ARG_1_HI:
36171 case MULTI_ARG_1_QI:
36172 case MULTI_ARG_1_SI_DI:
36173 case MULTI_ARG_1_HI_DI:
36174 case MULTI_ARG_1_HI_SI:
36175 case MULTI_ARG_1_QI_DI:
36176 case MULTI_ARG_1_QI_SI:
36177 case MULTI_ARG_1_QI_HI:
36178 nargs = 1;
36179 break;
36181 case MULTI_ARG_2_DI_CMP:
36182 case MULTI_ARG_2_SI_CMP:
36183 case MULTI_ARG_2_HI_CMP:
36184 case MULTI_ARG_2_QI_CMP:
36185 nargs = 2;
36186 comparison_p = true;
36187 break;
36189 case MULTI_ARG_2_SF_TF:
36190 case MULTI_ARG_2_DF_TF:
36191 case MULTI_ARG_2_DI_TF:
36192 case MULTI_ARG_2_SI_TF:
36193 case MULTI_ARG_2_HI_TF:
36194 case MULTI_ARG_2_QI_TF:
36195 nargs = 2;
36196 tf_p = true;
36197 break;
36199 default:
36200 gcc_unreachable ();
36203 if (optimize || !target
36204 || GET_MODE (target) != tmode
36205 || !insn_data[icode].operand[0].predicate (target, tmode))
36206 target = gen_reg_rtx (tmode);
36208 gcc_assert (nargs <= 4);
36210 for (i = 0; i < nargs; i++)
36212 tree arg = CALL_EXPR_ARG (exp, i);
36213 rtx op = expand_normal (arg);
36214 int adjust = (comparison_p) ? 1 : 0;
36215 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36217 if (last_arg_constant && i == nargs - 1)
36219 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36221 enum insn_code new_icode = icode;
36222 switch (icode)
36224 case CODE_FOR_xop_vpermil2v2df3:
36225 case CODE_FOR_xop_vpermil2v4sf3:
36226 case CODE_FOR_xop_vpermil2v4df3:
36227 case CODE_FOR_xop_vpermil2v8sf3:
36228 error ("the last argument must be a 2-bit immediate");
36229 return gen_reg_rtx (tmode);
36230 case CODE_FOR_xop_rotlv2di3:
36231 new_icode = CODE_FOR_rotlv2di3;
36232 goto xop_rotl;
36233 case CODE_FOR_xop_rotlv4si3:
36234 new_icode = CODE_FOR_rotlv4si3;
36235 goto xop_rotl;
36236 case CODE_FOR_xop_rotlv8hi3:
36237 new_icode = CODE_FOR_rotlv8hi3;
36238 goto xop_rotl;
36239 case CODE_FOR_xop_rotlv16qi3:
36240 new_icode = CODE_FOR_rotlv16qi3;
36241 xop_rotl:
36242 if (CONST_INT_P (op))
36244 int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1;
36245 op = GEN_INT (INTVAL (op) & mask);
36246 gcc_checking_assert
36247 (insn_data[icode].operand[i + 1].predicate (op, mode));
36249 else
36251 gcc_checking_assert
36252 (nargs == 2
36253 && insn_data[new_icode].operand[0].mode == tmode
36254 && insn_data[new_icode].operand[1].mode == tmode
36255 && insn_data[new_icode].operand[2].mode == mode
36256 && insn_data[new_icode].operand[0].predicate
36257 == insn_data[icode].operand[0].predicate
36258 && insn_data[new_icode].operand[1].predicate
36259 == insn_data[icode].operand[1].predicate);
36260 icode = new_icode;
36261 goto non_constant;
36263 break;
36264 default:
36265 gcc_unreachable ();
36269 else
36271 non_constant:
36272 if (VECTOR_MODE_P (mode))
36273 op = safe_vector_operand (op, mode);
36275 /* If we aren't optimizing, only allow one memory operand to be
36276 generated. */
36277 if (memory_operand (op, mode))
36278 num_memory++;
36280 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36282 if (optimize
36283 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36284 || num_memory > 1)
36285 op = force_reg (mode, op);
36288 args[i].op = op;
36289 args[i].mode = mode;
36292 switch (nargs)
36294 case 1:
36295 pat = GEN_FCN (icode) (target, args[0].op);
36296 break;
36298 case 2:
36299 if (tf_p)
36300 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36301 GEN_INT ((int)sub_code));
36302 else if (! comparison_p)
36303 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36304 else
36306 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36307 args[0].op,
36308 args[1].op);
36310 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36312 break;
36314 case 3:
36315 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36316 break;
36318 case 4:
36319 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36320 break;
36322 default:
36323 gcc_unreachable ();
36326 if (! pat)
36327 return 0;
36329 emit_insn (pat);
36330 return target;
36333 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36334 insns with vec_merge. */
36336 static rtx
36337 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36338 rtx target)
36340 rtx pat;
36341 tree arg0 = CALL_EXPR_ARG (exp, 0);
36342 rtx op1, op0 = expand_normal (arg0);
36343 machine_mode tmode = insn_data[icode].operand[0].mode;
36344 machine_mode mode0 = insn_data[icode].operand[1].mode;
36346 if (optimize || !target
36347 || GET_MODE (target) != tmode
36348 || !insn_data[icode].operand[0].predicate (target, tmode))
36349 target = gen_reg_rtx (tmode);
36351 if (VECTOR_MODE_P (mode0))
36352 op0 = safe_vector_operand (op0, mode0);
36354 if ((optimize && !register_operand (op0, mode0))
36355 || !insn_data[icode].operand[1].predicate (op0, mode0))
36356 op0 = copy_to_mode_reg (mode0, op0);
36358 op1 = op0;
36359 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36360 op1 = copy_to_mode_reg (mode0, op1);
36362 pat = GEN_FCN (icode) (target, op0, op1);
36363 if (! pat)
36364 return 0;
36365 emit_insn (pat);
36366 return target;
36369 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36371 static rtx
36372 ix86_expand_sse_compare (const struct builtin_description *d,
36373 tree exp, rtx target, bool swap)
36375 rtx pat;
36376 tree arg0 = CALL_EXPR_ARG (exp, 0);
36377 tree arg1 = CALL_EXPR_ARG (exp, 1);
36378 rtx op0 = expand_normal (arg0);
36379 rtx op1 = expand_normal (arg1);
36380 rtx op2;
36381 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36382 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36383 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36384 enum rtx_code comparison = d->comparison;
36386 if (VECTOR_MODE_P (mode0))
36387 op0 = safe_vector_operand (op0, mode0);
36388 if (VECTOR_MODE_P (mode1))
36389 op1 = safe_vector_operand (op1, mode1);
36391 /* Swap operands if we have a comparison that isn't available in
36392 hardware. */
36393 if (swap)
36394 std::swap (op0, op1);
36396 if (optimize || !target
36397 || GET_MODE (target) != tmode
36398 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36399 target = gen_reg_rtx (tmode);
36401 if ((optimize && !register_operand (op0, mode0))
36402 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36403 op0 = copy_to_mode_reg (mode0, op0);
36404 if ((optimize && !register_operand (op1, mode1))
36405 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36406 op1 = copy_to_mode_reg (mode1, op1);
36408 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36409 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36410 if (! pat)
36411 return 0;
36412 emit_insn (pat);
36413 return target;
36416 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36418 static rtx
36419 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36420 rtx target)
36422 rtx pat;
36423 tree arg0 = CALL_EXPR_ARG (exp, 0);
36424 tree arg1 = CALL_EXPR_ARG (exp, 1);
36425 rtx op0 = expand_normal (arg0);
36426 rtx op1 = expand_normal (arg1);
36427 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36428 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36429 enum rtx_code comparison = d->comparison;
36431 if (VECTOR_MODE_P (mode0))
36432 op0 = safe_vector_operand (op0, mode0);
36433 if (VECTOR_MODE_P (mode1))
36434 op1 = safe_vector_operand (op1, mode1);
36436 /* Swap operands if we have a comparison that isn't available in
36437 hardware. */
36438 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36439 std::swap (op0, op1);
36441 target = gen_reg_rtx (SImode);
36442 emit_move_insn (target, const0_rtx);
36443 target = gen_rtx_SUBREG (QImode, target, 0);
36445 if ((optimize && !register_operand (op0, mode0))
36446 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36447 op0 = copy_to_mode_reg (mode0, op0);
36448 if ((optimize && !register_operand (op1, mode1))
36449 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36450 op1 = copy_to_mode_reg (mode1, op1);
36452 pat = GEN_FCN (d->icode) (op0, op1);
36453 if (! pat)
36454 return 0;
36455 emit_insn (pat);
36456 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36457 gen_rtx_fmt_ee (comparison, QImode,
36458 SET_DEST (pat),
36459 const0_rtx)));
36461 return SUBREG_REG (target);
36464 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36466 static rtx
36467 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36468 rtx target)
36470 rtx pat;
36471 tree arg0 = CALL_EXPR_ARG (exp, 0);
36472 rtx op1, op0 = expand_normal (arg0);
36473 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36474 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36476 if (optimize || target == 0
36477 || GET_MODE (target) != tmode
36478 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36479 target = gen_reg_rtx (tmode);
36481 if (VECTOR_MODE_P (mode0))
36482 op0 = safe_vector_operand (op0, mode0);
36484 if ((optimize && !register_operand (op0, mode0))
36485 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36486 op0 = copy_to_mode_reg (mode0, op0);
36488 op1 = GEN_INT (d->comparison);
36490 pat = GEN_FCN (d->icode) (target, op0, op1);
36491 if (! pat)
36492 return 0;
36493 emit_insn (pat);
36494 return target;
36497 static rtx
36498 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36499 tree exp, rtx target)
36501 rtx pat;
36502 tree arg0 = CALL_EXPR_ARG (exp, 0);
36503 tree arg1 = CALL_EXPR_ARG (exp, 1);
36504 rtx op0 = expand_normal (arg0);
36505 rtx op1 = expand_normal (arg1);
36506 rtx op2;
36507 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36508 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36509 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36511 if (optimize || target == 0
36512 || GET_MODE (target) != tmode
36513 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36514 target = gen_reg_rtx (tmode);
36516 op0 = safe_vector_operand (op0, mode0);
36517 op1 = safe_vector_operand (op1, mode1);
36519 if ((optimize && !register_operand (op0, mode0))
36520 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36521 op0 = copy_to_mode_reg (mode0, op0);
36522 if ((optimize && !register_operand (op1, mode1))
36523 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36524 op1 = copy_to_mode_reg (mode1, op1);
36526 op2 = GEN_INT (d->comparison);
36528 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36529 if (! pat)
36530 return 0;
36531 emit_insn (pat);
36532 return target;
36535 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36537 static rtx
36538 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36539 rtx target)
36541 rtx pat;
36542 tree arg0 = CALL_EXPR_ARG (exp, 0);
36543 tree arg1 = CALL_EXPR_ARG (exp, 1);
36544 rtx op0 = expand_normal (arg0);
36545 rtx op1 = expand_normal (arg1);
36546 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36547 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36548 enum rtx_code comparison = d->comparison;
36550 if (VECTOR_MODE_P (mode0))
36551 op0 = safe_vector_operand (op0, mode0);
36552 if (VECTOR_MODE_P (mode1))
36553 op1 = safe_vector_operand (op1, mode1);
36555 target = gen_reg_rtx (SImode);
36556 emit_move_insn (target, const0_rtx);
36557 target = gen_rtx_SUBREG (QImode, target, 0);
36559 if ((optimize && !register_operand (op0, mode0))
36560 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36561 op0 = copy_to_mode_reg (mode0, op0);
36562 if ((optimize && !register_operand (op1, mode1))
36563 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36564 op1 = copy_to_mode_reg (mode1, op1);
36566 pat = GEN_FCN (d->icode) (op0, op1);
36567 if (! pat)
36568 return 0;
36569 emit_insn (pat);
36570 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36571 gen_rtx_fmt_ee (comparison, QImode,
36572 SET_DEST (pat),
36573 const0_rtx)));
36575 return SUBREG_REG (target);
36578 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36580 static rtx
36581 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36582 tree exp, rtx target)
36584 rtx pat;
36585 tree arg0 = CALL_EXPR_ARG (exp, 0);
36586 tree arg1 = CALL_EXPR_ARG (exp, 1);
36587 tree arg2 = CALL_EXPR_ARG (exp, 2);
36588 tree arg3 = CALL_EXPR_ARG (exp, 3);
36589 tree arg4 = CALL_EXPR_ARG (exp, 4);
36590 rtx scratch0, scratch1;
36591 rtx op0 = expand_normal (arg0);
36592 rtx op1 = expand_normal (arg1);
36593 rtx op2 = expand_normal (arg2);
36594 rtx op3 = expand_normal (arg3);
36595 rtx op4 = expand_normal (arg4);
36596 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36598 tmode0 = insn_data[d->icode].operand[0].mode;
36599 tmode1 = insn_data[d->icode].operand[1].mode;
36600 modev2 = insn_data[d->icode].operand[2].mode;
36601 modei3 = insn_data[d->icode].operand[3].mode;
36602 modev4 = insn_data[d->icode].operand[4].mode;
36603 modei5 = insn_data[d->icode].operand[5].mode;
36604 modeimm = insn_data[d->icode].operand[6].mode;
36606 if (VECTOR_MODE_P (modev2))
36607 op0 = safe_vector_operand (op0, modev2);
36608 if (VECTOR_MODE_P (modev4))
36609 op2 = safe_vector_operand (op2, modev4);
36611 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36612 op0 = copy_to_mode_reg (modev2, op0);
36613 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36614 op1 = copy_to_mode_reg (modei3, op1);
36615 if ((optimize && !register_operand (op2, modev4))
36616 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36617 op2 = copy_to_mode_reg (modev4, op2);
36618 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36619 op3 = copy_to_mode_reg (modei5, op3);
36621 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36623 error ("the fifth argument must be an 8-bit immediate");
36624 return const0_rtx;
36627 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36629 if (optimize || !target
36630 || GET_MODE (target) != tmode0
36631 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36632 target = gen_reg_rtx (tmode0);
36634 scratch1 = gen_reg_rtx (tmode1);
36636 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36638 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36640 if (optimize || !target
36641 || GET_MODE (target) != tmode1
36642 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36643 target = gen_reg_rtx (tmode1);
36645 scratch0 = gen_reg_rtx (tmode0);
36647 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36649 else
36651 gcc_assert (d->flag);
36653 scratch0 = gen_reg_rtx (tmode0);
36654 scratch1 = gen_reg_rtx (tmode1);
36656 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36659 if (! pat)
36660 return 0;
36662 emit_insn (pat);
36664 if (d->flag)
36666 target = gen_reg_rtx (SImode);
36667 emit_move_insn (target, const0_rtx);
36668 target = gen_rtx_SUBREG (QImode, target, 0);
36670 emit_insn
36671 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36672 gen_rtx_fmt_ee (EQ, QImode,
36673 gen_rtx_REG ((machine_mode) d->flag,
36674 FLAGS_REG),
36675 const0_rtx)));
36676 return SUBREG_REG (target);
36678 else
36679 return target;
36683 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36685 static rtx
36686 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36687 tree exp, rtx target)
36689 rtx pat;
36690 tree arg0 = CALL_EXPR_ARG (exp, 0);
36691 tree arg1 = CALL_EXPR_ARG (exp, 1);
36692 tree arg2 = CALL_EXPR_ARG (exp, 2);
36693 rtx scratch0, scratch1;
36694 rtx op0 = expand_normal (arg0);
36695 rtx op1 = expand_normal (arg1);
36696 rtx op2 = expand_normal (arg2);
36697 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36699 tmode0 = insn_data[d->icode].operand[0].mode;
36700 tmode1 = insn_data[d->icode].operand[1].mode;
36701 modev2 = insn_data[d->icode].operand[2].mode;
36702 modev3 = insn_data[d->icode].operand[3].mode;
36703 modeimm = insn_data[d->icode].operand[4].mode;
36705 if (VECTOR_MODE_P (modev2))
36706 op0 = safe_vector_operand (op0, modev2);
36707 if (VECTOR_MODE_P (modev3))
36708 op1 = safe_vector_operand (op1, modev3);
36710 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36711 op0 = copy_to_mode_reg (modev2, op0);
36712 if ((optimize && !register_operand (op1, modev3))
36713 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36714 op1 = copy_to_mode_reg (modev3, op1);
36716 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36718 error ("the third argument must be an 8-bit immediate");
36719 return const0_rtx;
36722 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36724 if (optimize || !target
36725 || GET_MODE (target) != tmode0
36726 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36727 target = gen_reg_rtx (tmode0);
36729 scratch1 = gen_reg_rtx (tmode1);
36731 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36733 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36735 if (optimize || !target
36736 || GET_MODE (target) != tmode1
36737 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36738 target = gen_reg_rtx (tmode1);
36740 scratch0 = gen_reg_rtx (tmode0);
36742 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36744 else
36746 gcc_assert (d->flag);
36748 scratch0 = gen_reg_rtx (tmode0);
36749 scratch1 = gen_reg_rtx (tmode1);
36751 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36754 if (! pat)
36755 return 0;
36757 emit_insn (pat);
36759 if (d->flag)
36761 target = gen_reg_rtx (SImode);
36762 emit_move_insn (target, const0_rtx);
36763 target = gen_rtx_SUBREG (QImode, target, 0);
36765 emit_insn
36766 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36767 gen_rtx_fmt_ee (EQ, QImode,
36768 gen_rtx_REG ((machine_mode) d->flag,
36769 FLAGS_REG),
36770 const0_rtx)));
36771 return SUBREG_REG (target);
36773 else
36774 return target;
36777 /* Subroutine of ix86_expand_builtin to take care of insns with
36778 variable number of operands. */
36780 static rtx
36781 ix86_expand_args_builtin (const struct builtin_description *d,
36782 tree exp, rtx target)
36784 rtx pat, real_target;
36785 unsigned int i, nargs;
36786 unsigned int nargs_constant = 0;
36787 unsigned int mask_pos = 0;
36788 int num_memory = 0;
36789 struct
36791 rtx op;
36792 machine_mode mode;
36793 } args[6];
36794 bool last_arg_count = false;
36795 enum insn_code icode = d->icode;
36796 const struct insn_data_d *insn_p = &insn_data[icode];
36797 machine_mode tmode = insn_p->operand[0].mode;
36798 machine_mode rmode = VOIDmode;
36799 bool swap = false;
36800 enum rtx_code comparison = d->comparison;
36802 switch ((enum ix86_builtin_func_type) d->flag)
36804 case V2DF_FTYPE_V2DF_ROUND:
36805 case V4DF_FTYPE_V4DF_ROUND:
36806 case V4SF_FTYPE_V4SF_ROUND:
36807 case V8SF_FTYPE_V8SF_ROUND:
36808 case V4SI_FTYPE_V4SF_ROUND:
36809 case V8SI_FTYPE_V8SF_ROUND:
36810 return ix86_expand_sse_round (d, exp, target);
36811 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36812 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36813 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36814 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36815 case INT_FTYPE_V8SF_V8SF_PTEST:
36816 case INT_FTYPE_V4DI_V4DI_PTEST:
36817 case INT_FTYPE_V4DF_V4DF_PTEST:
36818 case INT_FTYPE_V4SF_V4SF_PTEST:
36819 case INT_FTYPE_V2DI_V2DI_PTEST:
36820 case INT_FTYPE_V2DF_V2DF_PTEST:
36821 return ix86_expand_sse_ptest (d, exp, target);
36822 case FLOAT128_FTYPE_FLOAT128:
36823 case FLOAT_FTYPE_FLOAT:
36824 case INT_FTYPE_INT:
36825 case UINT64_FTYPE_INT:
36826 case UINT16_FTYPE_UINT16:
36827 case INT64_FTYPE_INT64:
36828 case INT64_FTYPE_V4SF:
36829 case INT64_FTYPE_V2DF:
36830 case INT_FTYPE_V16QI:
36831 case INT_FTYPE_V8QI:
36832 case INT_FTYPE_V8SF:
36833 case INT_FTYPE_V4DF:
36834 case INT_FTYPE_V4SF:
36835 case INT_FTYPE_V2DF:
36836 case INT_FTYPE_V32QI:
36837 case V16QI_FTYPE_V16QI:
36838 case V8SI_FTYPE_V8SF:
36839 case V8SI_FTYPE_V4SI:
36840 case V8HI_FTYPE_V8HI:
36841 case V8HI_FTYPE_V16QI:
36842 case V8QI_FTYPE_V8QI:
36843 case V8SF_FTYPE_V8SF:
36844 case V8SF_FTYPE_V8SI:
36845 case V8SF_FTYPE_V4SF:
36846 case V8SF_FTYPE_V8HI:
36847 case V4SI_FTYPE_V4SI:
36848 case V4SI_FTYPE_V16QI:
36849 case V4SI_FTYPE_V4SF:
36850 case V4SI_FTYPE_V8SI:
36851 case V4SI_FTYPE_V8HI:
36852 case V4SI_FTYPE_V4DF:
36853 case V4SI_FTYPE_V2DF:
36854 case V4HI_FTYPE_V4HI:
36855 case V4DF_FTYPE_V4DF:
36856 case V4DF_FTYPE_V4SI:
36857 case V4DF_FTYPE_V4SF:
36858 case V4DF_FTYPE_V2DF:
36859 case V4SF_FTYPE_V4SF:
36860 case V4SF_FTYPE_V4SI:
36861 case V4SF_FTYPE_V8SF:
36862 case V4SF_FTYPE_V4DF:
36863 case V4SF_FTYPE_V8HI:
36864 case V4SF_FTYPE_V2DF:
36865 case V2DI_FTYPE_V2DI:
36866 case V2DI_FTYPE_V16QI:
36867 case V2DI_FTYPE_V8HI:
36868 case V2DI_FTYPE_V4SI:
36869 case V2DF_FTYPE_V2DF:
36870 case V2DF_FTYPE_V4SI:
36871 case V2DF_FTYPE_V4DF:
36872 case V2DF_FTYPE_V4SF:
36873 case V2DF_FTYPE_V2SI:
36874 case V2SI_FTYPE_V2SI:
36875 case V2SI_FTYPE_V4SF:
36876 case V2SI_FTYPE_V2SF:
36877 case V2SI_FTYPE_V2DF:
36878 case V2SF_FTYPE_V2SF:
36879 case V2SF_FTYPE_V2SI:
36880 case V32QI_FTYPE_V32QI:
36881 case V32QI_FTYPE_V16QI:
36882 case V16HI_FTYPE_V16HI:
36883 case V16HI_FTYPE_V8HI:
36884 case V8SI_FTYPE_V8SI:
36885 case V16HI_FTYPE_V16QI:
36886 case V8SI_FTYPE_V16QI:
36887 case V4DI_FTYPE_V16QI:
36888 case V8SI_FTYPE_V8HI:
36889 case V4DI_FTYPE_V8HI:
36890 case V4DI_FTYPE_V4SI:
36891 case V4DI_FTYPE_V2DI:
36892 case UHI_FTYPE_UHI:
36893 case UHI_FTYPE_V16QI:
36894 case USI_FTYPE_V32QI:
36895 case UDI_FTYPE_V64QI:
36896 case V16QI_FTYPE_UHI:
36897 case V32QI_FTYPE_USI:
36898 case V64QI_FTYPE_UDI:
36899 case V8HI_FTYPE_UQI:
36900 case V16HI_FTYPE_UHI:
36901 case V32HI_FTYPE_USI:
36902 case V4SI_FTYPE_UQI:
36903 case V8SI_FTYPE_UQI:
36904 case V4SI_FTYPE_UHI:
36905 case V8SI_FTYPE_UHI:
36906 case UQI_FTYPE_V8HI:
36907 case UHI_FTYPE_V16HI:
36908 case USI_FTYPE_V32HI:
36909 case UQI_FTYPE_V4SI:
36910 case UQI_FTYPE_V8SI:
36911 case UHI_FTYPE_V16SI:
36912 case UQI_FTYPE_V2DI:
36913 case UQI_FTYPE_V4DI:
36914 case UQI_FTYPE_V8DI:
36915 case V16SI_FTYPE_UHI:
36916 case V2DI_FTYPE_UQI:
36917 case V4DI_FTYPE_UQI:
36918 case V16SI_FTYPE_INT:
36919 case V16SF_FTYPE_V8SF:
36920 case V16SI_FTYPE_V8SI:
36921 case V16SF_FTYPE_V4SF:
36922 case V16SI_FTYPE_V4SI:
36923 case V16SF_FTYPE_V16SF:
36924 case V8DI_FTYPE_UQI:
36925 case V8DF_FTYPE_V4DF:
36926 case V8DF_FTYPE_V2DF:
36927 case V8DF_FTYPE_V8DF:
36928 nargs = 1;
36929 break;
36930 case V4SF_FTYPE_V4SF_VEC_MERGE:
36931 case V2DF_FTYPE_V2DF_VEC_MERGE:
36932 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36933 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36934 case V16QI_FTYPE_V16QI_V16QI:
36935 case V16QI_FTYPE_V8HI_V8HI:
36936 case V16SF_FTYPE_V16SF_V16SF:
36937 case V8QI_FTYPE_V8QI_V8QI:
36938 case V8QI_FTYPE_V4HI_V4HI:
36939 case V8HI_FTYPE_V8HI_V8HI:
36940 case V8HI_FTYPE_V16QI_V16QI:
36941 case V8HI_FTYPE_V4SI_V4SI:
36942 case V8SF_FTYPE_V8SF_V8SF:
36943 case V8SF_FTYPE_V8SF_V8SI:
36944 case V8DF_FTYPE_V8DF_V8DF:
36945 case V4SI_FTYPE_V4SI_V4SI:
36946 case V4SI_FTYPE_V8HI_V8HI:
36947 case V4SI_FTYPE_V2DF_V2DF:
36948 case V4HI_FTYPE_V4HI_V4HI:
36949 case V4HI_FTYPE_V8QI_V8QI:
36950 case V4HI_FTYPE_V2SI_V2SI:
36951 case V4DF_FTYPE_V4DF_V4DF:
36952 case V4DF_FTYPE_V4DF_V4DI:
36953 case V4SF_FTYPE_V4SF_V4SF:
36954 case V4SF_FTYPE_V4SF_V4SI:
36955 case V4SF_FTYPE_V4SF_V2SI:
36956 case V4SF_FTYPE_V4SF_V2DF:
36957 case V4SF_FTYPE_V4SF_UINT:
36958 case V4SF_FTYPE_V4SF_DI:
36959 case V4SF_FTYPE_V4SF_SI:
36960 case V2DI_FTYPE_V2DI_V2DI:
36961 case V2DI_FTYPE_V16QI_V16QI:
36962 case V2DI_FTYPE_V4SI_V4SI:
36963 case V2DI_FTYPE_V2DI_V16QI:
36964 case V2SI_FTYPE_V2SI_V2SI:
36965 case V2SI_FTYPE_V4HI_V4HI:
36966 case V2SI_FTYPE_V2SF_V2SF:
36967 case V2DF_FTYPE_V2DF_V2DF:
36968 case V2DF_FTYPE_V2DF_V4SF:
36969 case V2DF_FTYPE_V2DF_V2DI:
36970 case V2DF_FTYPE_V2DF_DI:
36971 case V2DF_FTYPE_V2DF_SI:
36972 case V2DF_FTYPE_V2DF_UINT:
36973 case V2SF_FTYPE_V2SF_V2SF:
36974 case V1DI_FTYPE_V1DI_V1DI:
36975 case V1DI_FTYPE_V8QI_V8QI:
36976 case V1DI_FTYPE_V2SI_V2SI:
36977 case V32QI_FTYPE_V16HI_V16HI:
36978 case V16HI_FTYPE_V8SI_V8SI:
36979 case V32QI_FTYPE_V32QI_V32QI:
36980 case V16HI_FTYPE_V32QI_V32QI:
36981 case V16HI_FTYPE_V16HI_V16HI:
36982 case V8SI_FTYPE_V4DF_V4DF:
36983 case V8SI_FTYPE_V8SI_V8SI:
36984 case V8SI_FTYPE_V16HI_V16HI:
36985 case V4DI_FTYPE_V4DI_V4DI:
36986 case V4DI_FTYPE_V8SI_V8SI:
36987 case V8DI_FTYPE_V64QI_V64QI:
36988 if (comparison == UNKNOWN)
36989 return ix86_expand_binop_builtin (icode, exp, target);
36990 nargs = 2;
36991 break;
36992 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36993 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36994 gcc_assert (comparison != UNKNOWN);
36995 nargs = 2;
36996 swap = true;
36997 break;
36998 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36999 case V16HI_FTYPE_V16HI_SI_COUNT:
37000 case V8SI_FTYPE_V8SI_V4SI_COUNT:
37001 case V8SI_FTYPE_V8SI_SI_COUNT:
37002 case V4DI_FTYPE_V4DI_V2DI_COUNT:
37003 case V4DI_FTYPE_V4DI_INT_COUNT:
37004 case V8HI_FTYPE_V8HI_V8HI_COUNT:
37005 case V8HI_FTYPE_V8HI_SI_COUNT:
37006 case V4SI_FTYPE_V4SI_V4SI_COUNT:
37007 case V4SI_FTYPE_V4SI_SI_COUNT:
37008 case V4HI_FTYPE_V4HI_V4HI_COUNT:
37009 case V4HI_FTYPE_V4HI_SI_COUNT:
37010 case V2DI_FTYPE_V2DI_V2DI_COUNT:
37011 case V2DI_FTYPE_V2DI_SI_COUNT:
37012 case V2SI_FTYPE_V2SI_V2SI_COUNT:
37013 case V2SI_FTYPE_V2SI_SI_COUNT:
37014 case V1DI_FTYPE_V1DI_V1DI_COUNT:
37015 case V1DI_FTYPE_V1DI_SI_COUNT:
37016 nargs = 2;
37017 last_arg_count = true;
37018 break;
37019 case UINT64_FTYPE_UINT64_UINT64:
37020 case UINT_FTYPE_UINT_UINT:
37021 case UINT_FTYPE_UINT_USHORT:
37022 case UINT_FTYPE_UINT_UCHAR:
37023 case UINT16_FTYPE_UINT16_INT:
37024 case UINT8_FTYPE_UINT8_INT:
37025 case UHI_FTYPE_UHI_UHI:
37026 case USI_FTYPE_USI_USI:
37027 case UDI_FTYPE_UDI_UDI:
37028 case V16SI_FTYPE_V8DF_V8DF:
37029 nargs = 2;
37030 break;
37031 case V2DI_FTYPE_V2DI_INT_CONVERT:
37032 nargs = 2;
37033 rmode = V1TImode;
37034 nargs_constant = 1;
37035 break;
37036 case V4DI_FTYPE_V4DI_INT_CONVERT:
37037 nargs = 2;
37038 rmode = V2TImode;
37039 nargs_constant = 1;
37040 break;
37041 case V8DI_FTYPE_V8DI_INT_CONVERT:
37042 nargs = 2;
37043 rmode = V4TImode;
37044 nargs_constant = 1;
37045 break;
37046 case V8HI_FTYPE_V8HI_INT:
37047 case V8HI_FTYPE_V8SF_INT:
37048 case V16HI_FTYPE_V16SF_INT:
37049 case V8HI_FTYPE_V4SF_INT:
37050 case V8SF_FTYPE_V8SF_INT:
37051 case V4SF_FTYPE_V16SF_INT:
37052 case V16SF_FTYPE_V16SF_INT:
37053 case V4SI_FTYPE_V4SI_INT:
37054 case V4SI_FTYPE_V8SI_INT:
37055 case V4HI_FTYPE_V4HI_INT:
37056 case V4DF_FTYPE_V4DF_INT:
37057 case V4DF_FTYPE_V8DF_INT:
37058 case V4SF_FTYPE_V4SF_INT:
37059 case V4SF_FTYPE_V8SF_INT:
37060 case V2DI_FTYPE_V2DI_INT:
37061 case V2DF_FTYPE_V2DF_INT:
37062 case V2DF_FTYPE_V4DF_INT:
37063 case V16HI_FTYPE_V16HI_INT:
37064 case V8SI_FTYPE_V8SI_INT:
37065 case V16SI_FTYPE_V16SI_INT:
37066 case V4SI_FTYPE_V16SI_INT:
37067 case V4DI_FTYPE_V4DI_INT:
37068 case V2DI_FTYPE_V4DI_INT:
37069 case V4DI_FTYPE_V8DI_INT:
37070 case QI_FTYPE_V4SF_INT:
37071 case QI_FTYPE_V2DF_INT:
37072 nargs = 2;
37073 nargs_constant = 1;
37074 break;
37075 case V16QI_FTYPE_V16QI_V16QI_V16QI:
37076 case V8SF_FTYPE_V8SF_V8SF_V8SF:
37077 case V4DF_FTYPE_V4DF_V4DF_V4DF:
37078 case V4SF_FTYPE_V4SF_V4SF_V4SF:
37079 case V2DF_FTYPE_V2DF_V2DF_V2DF:
37080 case V32QI_FTYPE_V32QI_V32QI_V32QI:
37081 case UHI_FTYPE_V16SI_V16SI_UHI:
37082 case UQI_FTYPE_V8DI_V8DI_UQI:
37083 case V16HI_FTYPE_V16SI_V16HI_UHI:
37084 case V16QI_FTYPE_V16SI_V16QI_UHI:
37085 case V16QI_FTYPE_V8DI_V16QI_UQI:
37086 case V16SF_FTYPE_V16SF_V16SF_UHI:
37087 case V16SF_FTYPE_V4SF_V16SF_UHI:
37088 case V16SI_FTYPE_SI_V16SI_UHI:
37089 case V16SI_FTYPE_V16HI_V16SI_UHI:
37090 case V16SI_FTYPE_V16QI_V16SI_UHI:
37091 case V8SF_FTYPE_V4SF_V8SF_UQI:
37092 case V4DF_FTYPE_V2DF_V4DF_UQI:
37093 case V8SI_FTYPE_V4SI_V8SI_UQI:
37094 case V8SI_FTYPE_SI_V8SI_UQI:
37095 case V4SI_FTYPE_V4SI_V4SI_UQI:
37096 case V4SI_FTYPE_SI_V4SI_UQI:
37097 case V4DI_FTYPE_V2DI_V4DI_UQI:
37098 case V4DI_FTYPE_DI_V4DI_UQI:
37099 case V2DI_FTYPE_V2DI_V2DI_UQI:
37100 case V2DI_FTYPE_DI_V2DI_UQI:
37101 case V64QI_FTYPE_V64QI_V64QI_UDI:
37102 case V64QI_FTYPE_V16QI_V64QI_UDI:
37103 case V64QI_FTYPE_QI_V64QI_UDI:
37104 case V32QI_FTYPE_V32QI_V32QI_USI:
37105 case V32QI_FTYPE_V16QI_V32QI_USI:
37106 case V32QI_FTYPE_QI_V32QI_USI:
37107 case V16QI_FTYPE_V16QI_V16QI_UHI:
37108 case V16QI_FTYPE_QI_V16QI_UHI:
37109 case V32HI_FTYPE_V8HI_V32HI_USI:
37110 case V32HI_FTYPE_HI_V32HI_USI:
37111 case V16HI_FTYPE_V8HI_V16HI_UHI:
37112 case V16HI_FTYPE_HI_V16HI_UHI:
37113 case V8HI_FTYPE_V8HI_V8HI_UQI:
37114 case V8HI_FTYPE_HI_V8HI_UQI:
37115 case V8SF_FTYPE_V8HI_V8SF_UQI:
37116 case V4SF_FTYPE_V8HI_V4SF_UQI:
37117 case V8SI_FTYPE_V8SF_V8SI_UQI:
37118 case V4SI_FTYPE_V4SF_V4SI_UQI:
37119 case V4DI_FTYPE_V4SF_V4DI_UQI:
37120 case V2DI_FTYPE_V4SF_V2DI_UQI:
37121 case V4SF_FTYPE_V4DI_V4SF_UQI:
37122 case V4SF_FTYPE_V2DI_V4SF_UQI:
37123 case V4DF_FTYPE_V4DI_V4DF_UQI:
37124 case V2DF_FTYPE_V2DI_V2DF_UQI:
37125 case V16QI_FTYPE_V8HI_V16QI_UQI:
37126 case V16QI_FTYPE_V16HI_V16QI_UHI:
37127 case V16QI_FTYPE_V4SI_V16QI_UQI:
37128 case V16QI_FTYPE_V8SI_V16QI_UQI:
37129 case V8HI_FTYPE_V4SI_V8HI_UQI:
37130 case V8HI_FTYPE_V8SI_V8HI_UQI:
37131 case V16QI_FTYPE_V2DI_V16QI_UQI:
37132 case V16QI_FTYPE_V4DI_V16QI_UQI:
37133 case V8HI_FTYPE_V2DI_V8HI_UQI:
37134 case V8HI_FTYPE_V4DI_V8HI_UQI:
37135 case V4SI_FTYPE_V2DI_V4SI_UQI:
37136 case V4SI_FTYPE_V4DI_V4SI_UQI:
37137 case V32QI_FTYPE_V32HI_V32QI_USI:
37138 case UHI_FTYPE_V16QI_V16QI_UHI:
37139 case USI_FTYPE_V32QI_V32QI_USI:
37140 case UDI_FTYPE_V64QI_V64QI_UDI:
37141 case UQI_FTYPE_V8HI_V8HI_UQI:
37142 case UHI_FTYPE_V16HI_V16HI_UHI:
37143 case USI_FTYPE_V32HI_V32HI_USI:
37144 case UQI_FTYPE_V4SI_V4SI_UQI:
37145 case UQI_FTYPE_V8SI_V8SI_UQI:
37146 case UQI_FTYPE_V2DI_V2DI_UQI:
37147 case UQI_FTYPE_V4DI_V4DI_UQI:
37148 case V4SF_FTYPE_V2DF_V4SF_UQI:
37149 case V4SF_FTYPE_V4DF_V4SF_UQI:
37150 case V16SI_FTYPE_V16SI_V16SI_UHI:
37151 case V16SI_FTYPE_V4SI_V16SI_UHI:
37152 case V2DI_FTYPE_V4SI_V2DI_UQI:
37153 case V2DI_FTYPE_V8HI_V2DI_UQI:
37154 case V2DI_FTYPE_V16QI_V2DI_UQI:
37155 case V4DI_FTYPE_V4DI_V4DI_UQI:
37156 case V4DI_FTYPE_V4SI_V4DI_UQI:
37157 case V4DI_FTYPE_V8HI_V4DI_UQI:
37158 case V4DI_FTYPE_V16QI_V4DI_UQI:
37159 case V4DI_FTYPE_V4DF_V4DI_UQI:
37160 case V2DI_FTYPE_V2DF_V2DI_UQI:
37161 case V4SI_FTYPE_V4DF_V4SI_UQI:
37162 case V4SI_FTYPE_V2DF_V4SI_UQI:
37163 case V4SI_FTYPE_V8HI_V4SI_UQI:
37164 case V4SI_FTYPE_V16QI_V4SI_UQI:
37165 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37166 case V8DF_FTYPE_V2DF_V8DF_UQI:
37167 case V8DF_FTYPE_V4DF_V8DF_UQI:
37168 case V8DF_FTYPE_V8DF_V8DF_UQI:
37169 case V8SF_FTYPE_V8SF_V8SF_UQI:
37170 case V8SF_FTYPE_V8SI_V8SF_UQI:
37171 case V4DF_FTYPE_V4DF_V4DF_UQI:
37172 case V4SF_FTYPE_V4SF_V4SF_UQI:
37173 case V2DF_FTYPE_V2DF_V2DF_UQI:
37174 case V2DF_FTYPE_V4SF_V2DF_UQI:
37175 case V2DF_FTYPE_V4SI_V2DF_UQI:
37176 case V4SF_FTYPE_V4SI_V4SF_UQI:
37177 case V4DF_FTYPE_V4SF_V4DF_UQI:
37178 case V4DF_FTYPE_V4SI_V4DF_UQI:
37179 case V8SI_FTYPE_V8SI_V8SI_UQI:
37180 case V8SI_FTYPE_V8HI_V8SI_UQI:
37181 case V8SI_FTYPE_V16QI_V8SI_UQI:
37182 case V8DF_FTYPE_V8SI_V8DF_UQI:
37183 case V8DI_FTYPE_DI_V8DI_UQI:
37184 case V16SF_FTYPE_V8SF_V16SF_UHI:
37185 case V16SI_FTYPE_V8SI_V16SI_UHI:
37186 case V16HI_FTYPE_V16HI_V16HI_UHI:
37187 case V8HI_FTYPE_V16QI_V8HI_UQI:
37188 case V16HI_FTYPE_V16QI_V16HI_UHI:
37189 case V32HI_FTYPE_V32HI_V32HI_USI:
37190 case V32HI_FTYPE_V32QI_V32HI_USI:
37191 case V8DI_FTYPE_V16QI_V8DI_UQI:
37192 case V8DI_FTYPE_V2DI_V8DI_UQI:
37193 case V8DI_FTYPE_V4DI_V8DI_UQI:
37194 case V8DI_FTYPE_V8DI_V8DI_UQI:
37195 case V8DI_FTYPE_V8HI_V8DI_UQI:
37196 case V8DI_FTYPE_V8SI_V8DI_UQI:
37197 case V8HI_FTYPE_V8DI_V8HI_UQI:
37198 case V8SI_FTYPE_V8DI_V8SI_UQI:
37199 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37200 nargs = 3;
37201 break;
37202 case V32QI_FTYPE_V32QI_V32QI_INT:
37203 case V16HI_FTYPE_V16HI_V16HI_INT:
37204 case V16QI_FTYPE_V16QI_V16QI_INT:
37205 case V4DI_FTYPE_V4DI_V4DI_INT:
37206 case V8HI_FTYPE_V8HI_V8HI_INT:
37207 case V8SI_FTYPE_V8SI_V8SI_INT:
37208 case V8SI_FTYPE_V8SI_V4SI_INT:
37209 case V8SF_FTYPE_V8SF_V8SF_INT:
37210 case V8SF_FTYPE_V8SF_V4SF_INT:
37211 case V4SI_FTYPE_V4SI_V4SI_INT:
37212 case V4DF_FTYPE_V4DF_V4DF_INT:
37213 case V16SF_FTYPE_V16SF_V16SF_INT:
37214 case V16SF_FTYPE_V16SF_V4SF_INT:
37215 case V16SI_FTYPE_V16SI_V4SI_INT:
37216 case V4DF_FTYPE_V4DF_V2DF_INT:
37217 case V4SF_FTYPE_V4SF_V4SF_INT:
37218 case V2DI_FTYPE_V2DI_V2DI_INT:
37219 case V4DI_FTYPE_V4DI_V2DI_INT:
37220 case V2DF_FTYPE_V2DF_V2DF_INT:
37221 case UQI_FTYPE_V8DI_V8UDI_INT:
37222 case UQI_FTYPE_V8DF_V8DF_INT:
37223 case UQI_FTYPE_V2DF_V2DF_INT:
37224 case UQI_FTYPE_V4SF_V4SF_INT:
37225 case UHI_FTYPE_V16SI_V16SI_INT:
37226 case UHI_FTYPE_V16SF_V16SF_INT:
37227 nargs = 3;
37228 nargs_constant = 1;
37229 break;
37230 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37231 nargs = 3;
37232 rmode = V4DImode;
37233 nargs_constant = 1;
37234 break;
37235 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37236 nargs = 3;
37237 rmode = V2DImode;
37238 nargs_constant = 1;
37239 break;
37240 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37241 nargs = 3;
37242 rmode = DImode;
37243 nargs_constant = 1;
37244 break;
37245 case V2DI_FTYPE_V2DI_UINT_UINT:
37246 nargs = 3;
37247 nargs_constant = 2;
37248 break;
37249 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37250 nargs = 3;
37251 rmode = V8DImode;
37252 nargs_constant = 1;
37253 break;
37254 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT:
37255 nargs = 5;
37256 rmode = V8DImode;
37257 mask_pos = 2;
37258 nargs_constant = 1;
37259 break;
37260 case QI_FTYPE_V8DF_INT_UQI:
37261 case QI_FTYPE_V4DF_INT_UQI:
37262 case QI_FTYPE_V2DF_INT_UQI:
37263 case HI_FTYPE_V16SF_INT_UHI:
37264 case QI_FTYPE_V8SF_INT_UQI:
37265 case QI_FTYPE_V4SF_INT_UQI:
37266 nargs = 3;
37267 mask_pos = 1;
37268 nargs_constant = 1;
37269 break;
37270 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT:
37271 nargs = 5;
37272 rmode = V4DImode;
37273 mask_pos = 2;
37274 nargs_constant = 1;
37275 break;
37276 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT:
37277 nargs = 5;
37278 rmode = V2DImode;
37279 mask_pos = 2;
37280 nargs_constant = 1;
37281 break;
37282 case V32QI_FTYPE_V32QI_V32QI_V32QI_USI:
37283 case V32HI_FTYPE_V32HI_V32HI_V32HI_USI:
37284 case V32HI_FTYPE_V64QI_V64QI_V32HI_USI:
37285 case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI:
37286 case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI:
37287 case V32HI_FTYPE_V32HI_V8HI_V32HI_USI:
37288 case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI:
37289 case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI:
37290 case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI:
37291 case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI:
37292 case V32QI_FTYPE_V16HI_V16HI_V32QI_USI:
37293 case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI:
37294 case V32HI_FTYPE_V16SI_V16SI_V32HI_USI:
37295 case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI:
37296 case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI:
37297 case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI:
37298 case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI:
37299 case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI:
37300 case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI:
37301 case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI:
37302 case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI:
37303 case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI:
37304 case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI:
37305 case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI:
37306 case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI:
37307 case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI:
37308 case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI:
37309 case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI:
37310 case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI:
37311 case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI:
37312 case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI:
37313 case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI:
37314 case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI:
37315 case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI:
37316 case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI:
37317 case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI:
37318 case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI:
37319 case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI:
37320 case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI:
37321 case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI:
37322 case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI:
37323 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI:
37324 case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI:
37325 case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI:
37326 case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI:
37327 case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI:
37328 case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI:
37329 case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI:
37330 case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
37331 case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
37332 case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
37333 nargs = 4;
37334 break;
37335 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37336 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37337 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37338 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37339 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37340 nargs = 4;
37341 nargs_constant = 1;
37342 break;
37343 case UQI_FTYPE_V4DI_V4DI_INT_UQI:
37344 case UQI_FTYPE_V8SI_V8SI_INT_UQI:
37345 case QI_FTYPE_V4DF_V4DF_INT_UQI:
37346 case QI_FTYPE_V8SF_V8SF_INT_UQI:
37347 case UQI_FTYPE_V2DI_V2DI_INT_UQI:
37348 case UQI_FTYPE_V4SI_V4SI_INT_UQI:
37349 case UQI_FTYPE_V2DF_V2DF_INT_UQI:
37350 case UQI_FTYPE_V4SF_V4SF_INT_UQI:
37351 case UDI_FTYPE_V64QI_V64QI_INT_UDI:
37352 case USI_FTYPE_V32QI_V32QI_INT_USI:
37353 case UHI_FTYPE_V16QI_V16QI_INT_UHI:
37354 case USI_FTYPE_V32HI_V32HI_INT_USI:
37355 case UHI_FTYPE_V16HI_V16HI_INT_UHI:
37356 case UQI_FTYPE_V8HI_V8HI_INT_UQI:
37357 nargs = 4;
37358 mask_pos = 1;
37359 nargs_constant = 1;
37360 break;
37361 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37362 nargs = 4;
37363 nargs_constant = 2;
37364 break;
37365 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37366 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37367 nargs = 4;
37368 break;
37369 case UQI_FTYPE_V8DI_V8DI_INT_UQI:
37370 case UHI_FTYPE_V16SI_V16SI_INT_UHI:
37371 mask_pos = 1;
37372 nargs = 4;
37373 nargs_constant = 1;
37374 break;
37375 case V8SF_FTYPE_V8SF_INT_V8SF_UQI:
37376 case V4SF_FTYPE_V4SF_INT_V4SF_UQI:
37377 case V2DF_FTYPE_V4DF_INT_V2DF_UQI:
37378 case V2DI_FTYPE_V4DI_INT_V2DI_UQI:
37379 case V8SF_FTYPE_V16SF_INT_V8SF_UQI:
37380 case V8SI_FTYPE_V16SI_INT_V8SI_UQI:
37381 case V2DF_FTYPE_V8DF_INT_V2DF_UQI:
37382 case V2DI_FTYPE_V8DI_INT_V2DI_UQI:
37383 case V4SF_FTYPE_V8SF_INT_V4SF_UQI:
37384 case V4SI_FTYPE_V8SI_INT_V4SI_UQI:
37385 case V8HI_FTYPE_V8SF_INT_V8HI_UQI:
37386 case V8HI_FTYPE_V4SF_INT_V8HI_UQI:
37387 case V32HI_FTYPE_V32HI_INT_V32HI_USI:
37388 case V16HI_FTYPE_V16HI_INT_V16HI_UHI:
37389 case V8HI_FTYPE_V8HI_INT_V8HI_UQI:
37390 case V4DI_FTYPE_V4DI_INT_V4DI_UQI:
37391 case V2DI_FTYPE_V2DI_INT_V2DI_UQI:
37392 case V8SI_FTYPE_V8SI_INT_V8SI_UQI:
37393 case V4SI_FTYPE_V4SI_INT_V4SI_UQI:
37394 case V4DF_FTYPE_V4DF_INT_V4DF_UQI:
37395 case V2DF_FTYPE_V2DF_INT_V2DF_UQI:
37396 case V8DF_FTYPE_V8DF_INT_V8DF_UQI:
37397 case V16SF_FTYPE_V16SF_INT_V16SF_UHI:
37398 case V16HI_FTYPE_V16SF_INT_V16HI_UHI:
37399 case V16SI_FTYPE_V16SI_INT_V16SI_UHI:
37400 case V4SI_FTYPE_V16SI_INT_V4SI_UQI:
37401 case V4DI_FTYPE_V8DI_INT_V4DI_UQI:
37402 case V4DF_FTYPE_V8DF_INT_V4DF_UQI:
37403 case V4SF_FTYPE_V16SF_INT_V4SF_UQI:
37404 case V8DI_FTYPE_V8DI_INT_V8DI_UQI:
37405 nargs = 4;
37406 mask_pos = 2;
37407 nargs_constant = 1;
37408 break;
37409 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI:
37410 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI:
37411 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI:
37412 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI:
37413 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI:
37414 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI:
37415 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI:
37416 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI:
37417 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI:
37418 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI:
37419 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI:
37420 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI:
37421 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI:
37422 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI:
37423 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI:
37424 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI:
37425 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI:
37426 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI:
37427 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI:
37428 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI:
37429 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI:
37430 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI:
37431 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI:
37432 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI:
37433 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI:
37434 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI:
37435 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI:
37436 nargs = 5;
37437 mask_pos = 2;
37438 nargs_constant = 1;
37439 break;
37440 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI:
37441 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI:
37442 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI:
37443 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI:
37444 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI:
37445 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI:
37446 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI:
37447 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI:
37448 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI:
37449 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI:
37450 nargs = 5;
37451 nargs = 5;
37452 mask_pos = 1;
37453 nargs_constant = 1;
37454 break;
37456 default:
37457 gcc_unreachable ();
37460 gcc_assert (nargs <= ARRAY_SIZE (args));
37462 if (comparison != UNKNOWN)
37464 gcc_assert (nargs == 2);
37465 return ix86_expand_sse_compare (d, exp, target, swap);
37468 if (rmode == VOIDmode || rmode == tmode)
37470 if (optimize
37471 || target == 0
37472 || GET_MODE (target) != tmode
37473 || !insn_p->operand[0].predicate (target, tmode))
37474 target = gen_reg_rtx (tmode);
37475 real_target = target;
37477 else
37479 real_target = gen_reg_rtx (tmode);
37480 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37483 for (i = 0; i < nargs; i++)
37485 tree arg = CALL_EXPR_ARG (exp, i);
37486 rtx op = expand_normal (arg);
37487 machine_mode mode = insn_p->operand[i + 1].mode;
37488 bool match = insn_p->operand[i + 1].predicate (op, mode);
37490 if (last_arg_count && (i + 1) == nargs)
37492 /* SIMD shift insns take either an 8-bit immediate or
37493 register as count. But builtin functions take int as
37494 count. If count doesn't match, we put it in register. */
37495 if (!match)
37497 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37498 if (!insn_p->operand[i + 1].predicate (op, mode))
37499 op = copy_to_reg (op);
37502 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37503 (!mask_pos && (nargs - i) <= nargs_constant))
37505 if (!match)
37506 switch (icode)
37508 case CODE_FOR_avx_vinsertf128v4di:
37509 case CODE_FOR_avx_vextractf128v4di:
37510 error ("the last argument must be an 1-bit immediate");
37511 return const0_rtx;
37513 case CODE_FOR_avx512f_cmpv8di3_mask:
37514 case CODE_FOR_avx512f_cmpv16si3_mask:
37515 case CODE_FOR_avx512f_ucmpv8di3_mask:
37516 case CODE_FOR_avx512f_ucmpv16si3_mask:
37517 case CODE_FOR_avx512vl_cmpv4di3_mask:
37518 case CODE_FOR_avx512vl_cmpv8si3_mask:
37519 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37520 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37521 case CODE_FOR_avx512vl_cmpv2di3_mask:
37522 case CODE_FOR_avx512vl_cmpv4si3_mask:
37523 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37524 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37525 error ("the last argument must be a 3-bit immediate");
37526 return const0_rtx;
37528 case CODE_FOR_sse4_1_roundsd:
37529 case CODE_FOR_sse4_1_roundss:
37531 case CODE_FOR_sse4_1_roundpd:
37532 case CODE_FOR_sse4_1_roundps:
37533 case CODE_FOR_avx_roundpd256:
37534 case CODE_FOR_avx_roundps256:
37536 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37537 case CODE_FOR_sse4_1_roundps_sfix:
37538 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37539 case CODE_FOR_avx_roundps_sfix256:
37541 case CODE_FOR_sse4_1_blendps:
37542 case CODE_FOR_avx_blendpd256:
37543 case CODE_FOR_avx_vpermilv4df:
37544 case CODE_FOR_avx_vpermilv4df_mask:
37545 case CODE_FOR_avx512f_getmantv8df_mask:
37546 case CODE_FOR_avx512f_getmantv16sf_mask:
37547 case CODE_FOR_avx512vl_getmantv8sf_mask:
37548 case CODE_FOR_avx512vl_getmantv4df_mask:
37549 case CODE_FOR_avx512vl_getmantv4sf_mask:
37550 case CODE_FOR_avx512vl_getmantv2df_mask:
37551 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37552 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37553 case CODE_FOR_avx512dq_rangepv4df_mask:
37554 case CODE_FOR_avx512dq_rangepv8sf_mask:
37555 case CODE_FOR_avx512dq_rangepv2df_mask:
37556 case CODE_FOR_avx512dq_rangepv4sf_mask:
37557 case CODE_FOR_avx_shufpd256_mask:
37558 error ("the last argument must be a 4-bit immediate");
37559 return const0_rtx;
37561 case CODE_FOR_sha1rnds4:
37562 case CODE_FOR_sse4_1_blendpd:
37563 case CODE_FOR_avx_vpermilv2df:
37564 case CODE_FOR_avx_vpermilv2df_mask:
37565 case CODE_FOR_xop_vpermil2v2df3:
37566 case CODE_FOR_xop_vpermil2v4sf3:
37567 case CODE_FOR_xop_vpermil2v4df3:
37568 case CODE_FOR_xop_vpermil2v8sf3:
37569 case CODE_FOR_avx512f_vinsertf32x4_mask:
37570 case CODE_FOR_avx512f_vinserti32x4_mask:
37571 case CODE_FOR_avx512f_vextractf32x4_mask:
37572 case CODE_FOR_avx512f_vextracti32x4_mask:
37573 case CODE_FOR_sse2_shufpd:
37574 case CODE_FOR_sse2_shufpd_mask:
37575 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37576 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37577 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37578 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37579 error ("the last argument must be a 2-bit immediate");
37580 return const0_rtx;
37582 case CODE_FOR_avx_vextractf128v4df:
37583 case CODE_FOR_avx_vextractf128v8sf:
37584 case CODE_FOR_avx_vextractf128v8si:
37585 case CODE_FOR_avx_vinsertf128v4df:
37586 case CODE_FOR_avx_vinsertf128v8sf:
37587 case CODE_FOR_avx_vinsertf128v8si:
37588 case CODE_FOR_avx512f_vinsertf64x4_mask:
37589 case CODE_FOR_avx512f_vinserti64x4_mask:
37590 case CODE_FOR_avx512f_vextractf64x4_mask:
37591 case CODE_FOR_avx512f_vextracti64x4_mask:
37592 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37593 case CODE_FOR_avx512dq_vinserti32x8_mask:
37594 case CODE_FOR_avx512vl_vinsertv4df:
37595 case CODE_FOR_avx512vl_vinsertv4di:
37596 case CODE_FOR_avx512vl_vinsertv8sf:
37597 case CODE_FOR_avx512vl_vinsertv8si:
37598 error ("the last argument must be a 1-bit immediate");
37599 return const0_rtx;
37601 case CODE_FOR_avx_vmcmpv2df3:
37602 case CODE_FOR_avx_vmcmpv4sf3:
37603 case CODE_FOR_avx_cmpv2df3:
37604 case CODE_FOR_avx_cmpv4sf3:
37605 case CODE_FOR_avx_cmpv4df3:
37606 case CODE_FOR_avx_cmpv8sf3:
37607 case CODE_FOR_avx512f_cmpv8df3_mask:
37608 case CODE_FOR_avx512f_cmpv16sf3_mask:
37609 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37610 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37611 error ("the last argument must be a 5-bit immediate");
37612 return const0_rtx;
37614 default:
37615 switch (nargs_constant)
37617 case 2:
37618 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37619 (!mask_pos && (nargs - i) == nargs_constant))
37621 error ("the next to last argument must be an 8-bit immediate");
37622 break;
37624 case 1:
37625 error ("the last argument must be an 8-bit immediate");
37626 break;
37627 default:
37628 gcc_unreachable ();
37630 return const0_rtx;
37633 else
37635 if (VECTOR_MODE_P (mode))
37636 op = safe_vector_operand (op, mode);
37638 /* If we aren't optimizing, only allow one memory operand to
37639 be generated. */
37640 if (memory_operand (op, mode))
37641 num_memory++;
37643 op = fixup_modeless_constant (op, mode);
37645 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37647 if (optimize || !match || num_memory > 1)
37648 op = copy_to_mode_reg (mode, op);
37650 else
37652 op = copy_to_reg (op);
37653 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37657 args[i].op = op;
37658 args[i].mode = mode;
37661 switch (nargs)
37663 case 1:
37664 pat = GEN_FCN (icode) (real_target, args[0].op);
37665 break;
37666 case 2:
37667 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37668 break;
37669 case 3:
37670 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37671 args[2].op);
37672 break;
37673 case 4:
37674 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37675 args[2].op, args[3].op);
37676 break;
37677 case 5:
37678 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37679 args[2].op, args[3].op, args[4].op);
37680 case 6:
37681 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37682 args[2].op, args[3].op, args[4].op,
37683 args[5].op);
37684 break;
37685 default:
37686 gcc_unreachable ();
37689 if (! pat)
37690 return 0;
37692 emit_insn (pat);
37693 return target;
37696 /* Transform pattern of following layout:
37697 (parallel [
37698 set (A B)
37699 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37701 into:
37702 (set (A B))
37705 (parallel [ A B
37707 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37710 into:
37711 (parallel [ A B ... ]) */
37713 static rtx
37714 ix86_erase_embedded_rounding (rtx pat)
37716 if (GET_CODE (pat) == INSN)
37717 pat = PATTERN (pat);
37719 gcc_assert (GET_CODE (pat) == PARALLEL);
37721 if (XVECLEN (pat, 0) == 2)
37723 rtx p0 = XVECEXP (pat, 0, 0);
37724 rtx p1 = XVECEXP (pat, 0, 1);
37726 gcc_assert (GET_CODE (p0) == SET
37727 && GET_CODE (p1) == UNSPEC
37728 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37730 return p0;
37732 else
37734 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37735 int i = 0;
37736 int j = 0;
37738 for (; i < XVECLEN (pat, 0); ++i)
37740 rtx elem = XVECEXP (pat, 0, i);
37741 if (GET_CODE (elem) != UNSPEC
37742 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37743 res [j++] = elem;
37746 /* No more than 1 occurence was removed. */
37747 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37749 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37753 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37754 with rounding. */
37755 static rtx
37756 ix86_expand_sse_comi_round (const struct builtin_description *d,
37757 tree exp, rtx target)
37759 rtx pat, set_dst;
37760 tree arg0 = CALL_EXPR_ARG (exp, 0);
37761 tree arg1 = CALL_EXPR_ARG (exp, 1);
37762 tree arg2 = CALL_EXPR_ARG (exp, 2);
37763 tree arg3 = CALL_EXPR_ARG (exp, 3);
37764 rtx op0 = expand_normal (arg0);
37765 rtx op1 = expand_normal (arg1);
37766 rtx op2 = expand_normal (arg2);
37767 rtx op3 = expand_normal (arg3);
37768 enum insn_code icode = d->icode;
37769 const struct insn_data_d *insn_p = &insn_data[icode];
37770 machine_mode mode0 = insn_p->operand[0].mode;
37771 machine_mode mode1 = insn_p->operand[1].mode;
37772 enum rtx_code comparison = UNEQ;
37773 bool need_ucomi = false;
37775 /* See avxintrin.h for values. */
37776 enum rtx_code comi_comparisons[32] =
37778 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37779 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37780 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37782 bool need_ucomi_values[32] =
37784 true, false, false, true, true, false, false, true,
37785 true, false, false, true, true, false, false, true,
37786 false, true, true, false, false, true, true, false,
37787 false, true, true, false, false, true, true, false
37790 if (!CONST_INT_P (op2))
37792 error ("the third argument must be comparison constant");
37793 return const0_rtx;
37795 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37797 error ("incorrect comparison mode");
37798 return const0_rtx;
37801 if (!insn_p->operand[2].predicate (op3, SImode))
37803 error ("incorrect rounding operand");
37804 return const0_rtx;
37807 comparison = comi_comparisons[INTVAL (op2)];
37808 need_ucomi = need_ucomi_values[INTVAL (op2)];
37810 if (VECTOR_MODE_P (mode0))
37811 op0 = safe_vector_operand (op0, mode0);
37812 if (VECTOR_MODE_P (mode1))
37813 op1 = safe_vector_operand (op1, mode1);
37815 target = gen_reg_rtx (SImode);
37816 emit_move_insn (target, const0_rtx);
37817 target = gen_rtx_SUBREG (QImode, target, 0);
37819 if ((optimize && !register_operand (op0, mode0))
37820 || !insn_p->operand[0].predicate (op0, mode0))
37821 op0 = copy_to_mode_reg (mode0, op0);
37822 if ((optimize && !register_operand (op1, mode1))
37823 || !insn_p->operand[1].predicate (op1, mode1))
37824 op1 = copy_to_mode_reg (mode1, op1);
37826 if (need_ucomi)
37827 icode = icode == CODE_FOR_sse_comi_round
37828 ? CODE_FOR_sse_ucomi_round
37829 : CODE_FOR_sse2_ucomi_round;
37831 pat = GEN_FCN (icode) (op0, op1, op3);
37832 if (! pat)
37833 return 0;
37835 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37836 if (INTVAL (op3) == NO_ROUND)
37838 pat = ix86_erase_embedded_rounding (pat);
37839 if (! pat)
37840 return 0;
37842 set_dst = SET_DEST (pat);
37844 else
37846 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37847 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37850 emit_insn (pat);
37851 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37852 gen_rtx_fmt_ee (comparison, QImode,
37853 set_dst,
37854 const0_rtx)));
37856 return SUBREG_REG (target);
37859 static rtx
37860 ix86_expand_round_builtin (const struct builtin_description *d,
37861 tree exp, rtx target)
37863 rtx pat;
37864 unsigned int i, nargs;
37865 struct
37867 rtx op;
37868 machine_mode mode;
37869 } args[6];
37870 enum insn_code icode = d->icode;
37871 const struct insn_data_d *insn_p = &insn_data[icode];
37872 machine_mode tmode = insn_p->operand[0].mode;
37873 unsigned int nargs_constant = 0;
37874 unsigned int redundant_embed_rnd = 0;
37876 switch ((enum ix86_builtin_func_type) d->flag)
37878 case UINT64_FTYPE_V2DF_INT:
37879 case UINT64_FTYPE_V4SF_INT:
37880 case UINT_FTYPE_V2DF_INT:
37881 case UINT_FTYPE_V4SF_INT:
37882 case INT64_FTYPE_V2DF_INT:
37883 case INT64_FTYPE_V4SF_INT:
37884 case INT_FTYPE_V2DF_INT:
37885 case INT_FTYPE_V4SF_INT:
37886 nargs = 2;
37887 break;
37888 case V4SF_FTYPE_V4SF_UINT_INT:
37889 case V4SF_FTYPE_V4SF_UINT64_INT:
37890 case V2DF_FTYPE_V2DF_UINT64_INT:
37891 case V4SF_FTYPE_V4SF_INT_INT:
37892 case V4SF_FTYPE_V4SF_INT64_INT:
37893 case V2DF_FTYPE_V2DF_INT64_INT:
37894 case V4SF_FTYPE_V4SF_V4SF_INT:
37895 case V2DF_FTYPE_V2DF_V2DF_INT:
37896 case V4SF_FTYPE_V4SF_V2DF_INT:
37897 case V2DF_FTYPE_V2DF_V4SF_INT:
37898 nargs = 3;
37899 break;
37900 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37901 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37902 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37903 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37904 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37905 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37906 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37907 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37908 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37909 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37910 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37911 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37912 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37913 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37914 nargs = 4;
37915 break;
37916 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37917 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37918 nargs_constant = 2;
37919 nargs = 4;
37920 break;
37921 case INT_FTYPE_V4SF_V4SF_INT_INT:
37922 case INT_FTYPE_V2DF_V2DF_INT_INT:
37923 return ix86_expand_sse_comi_round (d, exp, target);
37924 case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
37925 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37926 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37927 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37928 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37929 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37930 nargs = 5;
37931 break;
37932 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37933 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37934 nargs_constant = 4;
37935 nargs = 5;
37936 break;
37937 case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
37938 case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
37939 case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
37940 case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
37941 nargs_constant = 3;
37942 nargs = 5;
37943 break;
37944 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37945 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37946 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37947 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37948 nargs = 6;
37949 nargs_constant = 4;
37950 break;
37951 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37952 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37953 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37954 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37955 nargs = 6;
37956 nargs_constant = 3;
37957 break;
37958 default:
37959 gcc_unreachable ();
37961 gcc_assert (nargs <= ARRAY_SIZE (args));
37963 if (optimize
37964 || target == 0
37965 || GET_MODE (target) != tmode
37966 || !insn_p->operand[0].predicate (target, tmode))
37967 target = gen_reg_rtx (tmode);
37969 for (i = 0; i < nargs; i++)
37971 tree arg = CALL_EXPR_ARG (exp, i);
37972 rtx op = expand_normal (arg);
37973 machine_mode mode = insn_p->operand[i + 1].mode;
37974 bool match = insn_p->operand[i + 1].predicate (op, mode);
37976 if (i == nargs - nargs_constant)
37978 if (!match)
37980 switch (icode)
37982 case CODE_FOR_avx512f_getmantv8df_mask_round:
37983 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37984 case CODE_FOR_avx512f_vgetmantv2df_round:
37985 case CODE_FOR_avx512f_vgetmantv4sf_round:
37986 error ("the immediate argument must be a 4-bit immediate");
37987 return const0_rtx;
37988 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37989 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37990 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37991 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37992 error ("the immediate argument must be a 5-bit immediate");
37993 return const0_rtx;
37994 default:
37995 error ("the immediate argument must be an 8-bit immediate");
37996 return const0_rtx;
38000 else if (i == nargs-1)
38002 if (!insn_p->operand[nargs].predicate (op, SImode))
38004 error ("incorrect rounding operand");
38005 return const0_rtx;
38008 /* If there is no rounding use normal version of the pattern. */
38009 if (INTVAL (op) == NO_ROUND)
38010 redundant_embed_rnd = 1;
38012 else
38014 if (VECTOR_MODE_P (mode))
38015 op = safe_vector_operand (op, mode);
38017 op = fixup_modeless_constant (op, mode);
38019 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38021 if (optimize || !match)
38022 op = copy_to_mode_reg (mode, op);
38024 else
38026 op = copy_to_reg (op);
38027 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38031 args[i].op = op;
38032 args[i].mode = mode;
38035 switch (nargs)
38037 case 1:
38038 pat = GEN_FCN (icode) (target, args[0].op);
38039 break;
38040 case 2:
38041 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38042 break;
38043 case 3:
38044 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38045 args[2].op);
38046 break;
38047 case 4:
38048 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38049 args[2].op, args[3].op);
38050 break;
38051 case 5:
38052 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38053 args[2].op, args[3].op, args[4].op);
38054 case 6:
38055 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38056 args[2].op, args[3].op, args[4].op,
38057 args[5].op);
38058 break;
38059 default:
38060 gcc_unreachable ();
38063 if (!pat)
38064 return 0;
38066 if (redundant_embed_rnd)
38067 pat = ix86_erase_embedded_rounding (pat);
38069 emit_insn (pat);
38070 return target;
38073 /* Subroutine of ix86_expand_builtin to take care of special insns
38074 with variable number of operands. */
38076 static rtx
38077 ix86_expand_special_args_builtin (const struct builtin_description *d,
38078 tree exp, rtx target)
38080 tree arg;
38081 rtx pat, op;
38082 unsigned int i, nargs, arg_adjust, memory;
38083 bool aligned_mem = false;
38084 struct
38086 rtx op;
38087 machine_mode mode;
38088 } args[3];
38089 enum insn_code icode = d->icode;
38090 bool last_arg_constant = false;
38091 const struct insn_data_d *insn_p = &insn_data[icode];
38092 machine_mode tmode = insn_p->operand[0].mode;
38093 enum { load, store } klass;
38095 switch ((enum ix86_builtin_func_type) d->flag)
38097 case VOID_FTYPE_VOID:
38098 emit_insn (GEN_FCN (icode) (target));
38099 return 0;
38100 case VOID_FTYPE_UINT64:
38101 case VOID_FTYPE_UNSIGNED:
38102 nargs = 0;
38103 klass = store;
38104 memory = 0;
38105 break;
38107 case INT_FTYPE_VOID:
38108 case USHORT_FTYPE_VOID:
38109 case UINT64_FTYPE_VOID:
38110 case UNSIGNED_FTYPE_VOID:
38111 nargs = 0;
38112 klass = load;
38113 memory = 0;
38114 break;
38115 case UINT64_FTYPE_PUNSIGNED:
38116 case V2DI_FTYPE_PV2DI:
38117 case V4DI_FTYPE_PV4DI:
38118 case V32QI_FTYPE_PCCHAR:
38119 case V16QI_FTYPE_PCCHAR:
38120 case V8SF_FTYPE_PCV4SF:
38121 case V8SF_FTYPE_PCFLOAT:
38122 case V4SF_FTYPE_PCFLOAT:
38123 case V4DF_FTYPE_PCV2DF:
38124 case V4DF_FTYPE_PCDOUBLE:
38125 case V2DF_FTYPE_PCDOUBLE:
38126 case VOID_FTYPE_PVOID:
38127 case V8DI_FTYPE_PV8DI:
38128 nargs = 1;
38129 klass = load;
38130 memory = 0;
38131 switch (icode)
38133 case CODE_FOR_sse4_1_movntdqa:
38134 case CODE_FOR_avx2_movntdqa:
38135 case CODE_FOR_avx512f_movntdqa:
38136 aligned_mem = true;
38137 break;
38138 default:
38139 break;
38141 break;
38142 case VOID_FTYPE_PV2SF_V4SF:
38143 case VOID_FTYPE_PV8DI_V8DI:
38144 case VOID_FTYPE_PV4DI_V4DI:
38145 case VOID_FTYPE_PV2DI_V2DI:
38146 case VOID_FTYPE_PCHAR_V32QI:
38147 case VOID_FTYPE_PCHAR_V16QI:
38148 case VOID_FTYPE_PFLOAT_V16SF:
38149 case VOID_FTYPE_PFLOAT_V8SF:
38150 case VOID_FTYPE_PFLOAT_V4SF:
38151 case VOID_FTYPE_PDOUBLE_V8DF:
38152 case VOID_FTYPE_PDOUBLE_V4DF:
38153 case VOID_FTYPE_PDOUBLE_V2DF:
38154 case VOID_FTYPE_PLONGLONG_LONGLONG:
38155 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38156 case VOID_FTYPE_PINT_INT:
38157 nargs = 1;
38158 klass = store;
38159 /* Reserve memory operand for target. */
38160 memory = ARRAY_SIZE (args);
38161 switch (icode)
38163 /* These builtins and instructions require the memory
38164 to be properly aligned. */
38165 case CODE_FOR_avx_movntv4di:
38166 case CODE_FOR_sse2_movntv2di:
38167 case CODE_FOR_avx_movntv8sf:
38168 case CODE_FOR_sse_movntv4sf:
38169 case CODE_FOR_sse4a_vmmovntv4sf:
38170 case CODE_FOR_avx_movntv4df:
38171 case CODE_FOR_sse2_movntv2df:
38172 case CODE_FOR_sse4a_vmmovntv2df:
38173 case CODE_FOR_sse2_movntidi:
38174 case CODE_FOR_sse_movntq:
38175 case CODE_FOR_sse2_movntisi:
38176 case CODE_FOR_avx512f_movntv16sf:
38177 case CODE_FOR_avx512f_movntv8df:
38178 case CODE_FOR_avx512f_movntv8di:
38179 aligned_mem = true;
38180 break;
38181 default:
38182 break;
38184 break;
38185 case V4SF_FTYPE_V4SF_PCV2SF:
38186 case V2DF_FTYPE_V2DF_PCDOUBLE:
38187 nargs = 2;
38188 klass = load;
38189 memory = 1;
38190 break;
38191 case V8SF_FTYPE_PCV8SF_V8SI:
38192 case V4DF_FTYPE_PCV4DF_V4DI:
38193 case V4SF_FTYPE_PCV4SF_V4SI:
38194 case V2DF_FTYPE_PCV2DF_V2DI:
38195 case V8SI_FTYPE_PCV8SI_V8SI:
38196 case V4DI_FTYPE_PCV4DI_V4DI:
38197 case V4SI_FTYPE_PCV4SI_V4SI:
38198 case V2DI_FTYPE_PCV2DI_V2DI:
38199 nargs = 2;
38200 klass = load;
38201 memory = 0;
38202 break;
38203 case VOID_FTYPE_PV8DF_V8DF_UQI:
38204 case VOID_FTYPE_PV16SF_V16SF_UHI:
38205 case VOID_FTYPE_PV8DI_V8DI_UQI:
38206 case VOID_FTYPE_PV4DI_V4DI_UQI:
38207 case VOID_FTYPE_PV2DI_V2DI_UQI:
38208 case VOID_FTYPE_PV16SI_V16SI_UHI:
38209 case VOID_FTYPE_PV8SI_V8SI_UQI:
38210 case VOID_FTYPE_PV4SI_V4SI_UQI:
38211 switch (icode)
38213 /* These builtins and instructions require the memory
38214 to be properly aligned. */
38215 case CODE_FOR_avx512f_storev16sf_mask:
38216 case CODE_FOR_avx512f_storev16si_mask:
38217 case CODE_FOR_avx512f_storev8df_mask:
38218 case CODE_FOR_avx512f_storev8di_mask:
38219 case CODE_FOR_avx512vl_storev8sf_mask:
38220 case CODE_FOR_avx512vl_storev8si_mask:
38221 case CODE_FOR_avx512vl_storev4df_mask:
38222 case CODE_FOR_avx512vl_storev4di_mask:
38223 case CODE_FOR_avx512vl_storev4sf_mask:
38224 case CODE_FOR_avx512vl_storev4si_mask:
38225 case CODE_FOR_avx512vl_storev2df_mask:
38226 case CODE_FOR_avx512vl_storev2di_mask:
38227 aligned_mem = true;
38228 break;
38229 default:
38230 break;
38232 /* FALLTHRU */
38233 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38234 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38235 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38236 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38237 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38238 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38239 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38240 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38241 case VOID_FTYPE_PV8SI_V8DI_UQI:
38242 case VOID_FTYPE_PV8HI_V8DI_UQI:
38243 case VOID_FTYPE_PV16HI_V16SI_UHI:
38244 case VOID_FTYPE_PV16QI_V8DI_UQI:
38245 case VOID_FTYPE_PV16QI_V16SI_UHI:
38246 case VOID_FTYPE_PV4SI_V4DI_UQI:
38247 case VOID_FTYPE_PV4SI_V2DI_UQI:
38248 case VOID_FTYPE_PV8HI_V4DI_UQI:
38249 case VOID_FTYPE_PV8HI_V2DI_UQI:
38250 case VOID_FTYPE_PV8HI_V8SI_UQI:
38251 case VOID_FTYPE_PV8HI_V4SI_UQI:
38252 case VOID_FTYPE_PV16QI_V4DI_UQI:
38253 case VOID_FTYPE_PV16QI_V2DI_UQI:
38254 case VOID_FTYPE_PV16QI_V8SI_UQI:
38255 case VOID_FTYPE_PV16QI_V4SI_UQI:
38256 case VOID_FTYPE_PV8HI_V8HI_UQI:
38257 case VOID_FTYPE_PV16HI_V16HI_UHI:
38258 case VOID_FTYPE_PV32HI_V32HI_USI:
38259 case VOID_FTYPE_PV16QI_V16QI_UHI:
38260 case VOID_FTYPE_PV32QI_V32QI_USI:
38261 case VOID_FTYPE_PV64QI_V64QI_UDI:
38262 case VOID_FTYPE_PV4DF_V4DF_UQI:
38263 case VOID_FTYPE_PV2DF_V2DF_UQI:
38264 case VOID_FTYPE_PV8SF_V8SF_UQI:
38265 case VOID_FTYPE_PV4SF_V4SF_UQI:
38266 nargs = 2;
38267 klass = store;
38268 /* Reserve memory operand for target. */
38269 memory = ARRAY_SIZE (args);
38270 break;
38271 case V4SF_FTYPE_PCV4SF_V4SF_UQI:
38272 case V8SF_FTYPE_PCV8SF_V8SF_UQI:
38273 case V16SF_FTYPE_PCV16SF_V16SF_UHI:
38274 case V4SI_FTYPE_PCV4SI_V4SI_UQI:
38275 case V8SI_FTYPE_PCV8SI_V8SI_UQI:
38276 case V16SI_FTYPE_PCV16SI_V16SI_UHI:
38277 case V2DF_FTYPE_PCV2DF_V2DF_UQI:
38278 case V4DF_FTYPE_PCV4DF_V4DF_UQI:
38279 case V8DF_FTYPE_PCV8DF_V8DF_UQI:
38280 case V2DI_FTYPE_PCV2DI_V2DI_UQI:
38281 case V4DI_FTYPE_PCV4DI_V4DI_UQI:
38282 case V8DI_FTYPE_PCV8DI_V8DI_UQI:
38283 case V8HI_FTYPE_PCV8HI_V8HI_UQI:
38284 case V16HI_FTYPE_PCV16HI_V16HI_UHI:
38285 case V32HI_FTYPE_PCV32HI_V32HI_USI:
38286 case V16QI_FTYPE_PCV16QI_V16QI_UHI:
38287 case V32QI_FTYPE_PCV32QI_V32QI_USI:
38288 case V64QI_FTYPE_PCV64QI_V64QI_UDI:
38289 nargs = 3;
38290 klass = load;
38291 memory = 0;
38292 switch (icode)
38294 /* These builtins and instructions require the memory
38295 to be properly aligned. */
38296 case CODE_FOR_avx512f_loadv16sf_mask:
38297 case CODE_FOR_avx512f_loadv16si_mask:
38298 case CODE_FOR_avx512f_loadv8df_mask:
38299 case CODE_FOR_avx512f_loadv8di_mask:
38300 case CODE_FOR_avx512vl_loadv8sf_mask:
38301 case CODE_FOR_avx512vl_loadv8si_mask:
38302 case CODE_FOR_avx512vl_loadv4df_mask:
38303 case CODE_FOR_avx512vl_loadv4di_mask:
38304 case CODE_FOR_avx512vl_loadv4sf_mask:
38305 case CODE_FOR_avx512vl_loadv4si_mask:
38306 case CODE_FOR_avx512vl_loadv2df_mask:
38307 case CODE_FOR_avx512vl_loadv2di_mask:
38308 case CODE_FOR_avx512bw_loadv64qi_mask:
38309 case CODE_FOR_avx512vl_loadv32qi_mask:
38310 case CODE_FOR_avx512vl_loadv16qi_mask:
38311 case CODE_FOR_avx512bw_loadv32hi_mask:
38312 case CODE_FOR_avx512vl_loadv16hi_mask:
38313 case CODE_FOR_avx512vl_loadv8hi_mask:
38314 aligned_mem = true;
38315 break;
38316 default:
38317 break;
38319 break;
38320 case VOID_FTYPE_UINT_UINT_UINT:
38321 case VOID_FTYPE_UINT64_UINT_UINT:
38322 case UCHAR_FTYPE_UINT_UINT_UINT:
38323 case UCHAR_FTYPE_UINT64_UINT_UINT:
38324 nargs = 3;
38325 klass = load;
38326 memory = ARRAY_SIZE (args);
38327 last_arg_constant = true;
38328 break;
38329 default:
38330 gcc_unreachable ();
38333 gcc_assert (nargs <= ARRAY_SIZE (args));
38335 if (klass == store)
38337 arg = CALL_EXPR_ARG (exp, 0);
38338 op = expand_normal (arg);
38339 gcc_assert (target == 0);
38340 if (memory)
38342 op = ix86_zero_extend_to_Pmode (op);
38343 target = gen_rtx_MEM (tmode, op);
38344 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38345 on it. Try to improve it using get_pointer_alignment,
38346 and if the special builtin is one that requires strict
38347 mode alignment, also from it's GET_MODE_ALIGNMENT.
38348 Failure to do so could lead to ix86_legitimate_combined_insn
38349 rejecting all changes to such insns. */
38350 unsigned int align = get_pointer_alignment (arg);
38351 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38352 align = GET_MODE_ALIGNMENT (tmode);
38353 if (MEM_ALIGN (target) < align)
38354 set_mem_align (target, align);
38356 else
38357 target = force_reg (tmode, op);
38358 arg_adjust = 1;
38360 else
38362 arg_adjust = 0;
38363 if (optimize
38364 || target == 0
38365 || !register_operand (target, tmode)
38366 || GET_MODE (target) != tmode)
38367 target = gen_reg_rtx (tmode);
38370 for (i = 0; i < nargs; i++)
38372 machine_mode mode = insn_p->operand[i + 1].mode;
38373 bool match;
38375 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38376 op = expand_normal (arg);
38377 match = insn_p->operand[i + 1].predicate (op, mode);
38379 if (last_arg_constant && (i + 1) == nargs)
38381 if (!match)
38383 if (icode == CODE_FOR_lwp_lwpvalsi3
38384 || icode == CODE_FOR_lwp_lwpinssi3
38385 || icode == CODE_FOR_lwp_lwpvaldi3
38386 || icode == CODE_FOR_lwp_lwpinsdi3)
38387 error ("the last argument must be a 32-bit immediate");
38388 else
38389 error ("the last argument must be an 8-bit immediate");
38390 return const0_rtx;
38393 else
38395 if (i == memory)
38397 /* This must be the memory operand. */
38398 op = ix86_zero_extend_to_Pmode (op);
38399 op = gen_rtx_MEM (mode, op);
38400 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38401 on it. Try to improve it using get_pointer_alignment,
38402 and if the special builtin is one that requires strict
38403 mode alignment, also from it's GET_MODE_ALIGNMENT.
38404 Failure to do so could lead to ix86_legitimate_combined_insn
38405 rejecting all changes to such insns. */
38406 unsigned int align = get_pointer_alignment (arg);
38407 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38408 align = GET_MODE_ALIGNMENT (mode);
38409 if (MEM_ALIGN (op) < align)
38410 set_mem_align (op, align);
38412 else
38414 /* This must be register. */
38415 if (VECTOR_MODE_P (mode))
38416 op = safe_vector_operand (op, mode);
38418 op = fixup_modeless_constant (op, mode);
38420 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38421 op = copy_to_mode_reg (mode, op);
38422 else
38424 op = copy_to_reg (op);
38425 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38430 args[i].op = op;
38431 args[i].mode = mode;
38434 switch (nargs)
38436 case 0:
38437 pat = GEN_FCN (icode) (target);
38438 break;
38439 case 1:
38440 pat = GEN_FCN (icode) (target, args[0].op);
38441 break;
38442 case 2:
38443 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38444 break;
38445 case 3:
38446 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38447 break;
38448 default:
38449 gcc_unreachable ();
38452 if (! pat)
38453 return 0;
38454 emit_insn (pat);
38455 return klass == store ? 0 : target;
38458 /* Return the integer constant in ARG. Constrain it to be in the range
38459 of the subparts of VEC_TYPE; issue an error if not. */
38461 static int
38462 get_element_number (tree vec_type, tree arg)
38464 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38466 if (!tree_fits_uhwi_p (arg)
38467 || (elt = tree_to_uhwi (arg), elt > max))
38469 error ("selector must be an integer constant in the range 0..%wi", max);
38470 return 0;
38473 return elt;
38476 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38477 ix86_expand_vector_init. We DO have language-level syntax for this, in
38478 the form of (type){ init-list }. Except that since we can't place emms
38479 instructions from inside the compiler, we can't allow the use of MMX
38480 registers unless the user explicitly asks for it. So we do *not* define
38481 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38482 we have builtins invoked by mmintrin.h that gives us license to emit
38483 these sorts of instructions. */
38485 static rtx
38486 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38488 machine_mode tmode = TYPE_MODE (type);
38489 machine_mode inner_mode = GET_MODE_INNER (tmode);
38490 int i, n_elt = GET_MODE_NUNITS (tmode);
38491 rtvec v = rtvec_alloc (n_elt);
38493 gcc_assert (VECTOR_MODE_P (tmode));
38494 gcc_assert (call_expr_nargs (exp) == n_elt);
38496 for (i = 0; i < n_elt; ++i)
38498 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38499 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38502 if (!target || !register_operand (target, tmode))
38503 target = gen_reg_rtx (tmode);
38505 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38506 return target;
38509 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38510 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38511 had a language-level syntax for referencing vector elements. */
38513 static rtx
38514 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38516 machine_mode tmode, mode0;
38517 tree arg0, arg1;
38518 int elt;
38519 rtx op0;
38521 arg0 = CALL_EXPR_ARG (exp, 0);
38522 arg1 = CALL_EXPR_ARG (exp, 1);
38524 op0 = expand_normal (arg0);
38525 elt = get_element_number (TREE_TYPE (arg0), arg1);
38527 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38528 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38529 gcc_assert (VECTOR_MODE_P (mode0));
38531 op0 = force_reg (mode0, op0);
38533 if (optimize || !target || !register_operand (target, tmode))
38534 target = gen_reg_rtx (tmode);
38536 ix86_expand_vector_extract (true, target, op0, elt);
38538 return target;
38541 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38542 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38543 a language-level syntax for referencing vector elements. */
38545 static rtx
38546 ix86_expand_vec_set_builtin (tree exp)
38548 machine_mode tmode, mode1;
38549 tree arg0, arg1, arg2;
38550 int elt;
38551 rtx op0, op1, target;
38553 arg0 = CALL_EXPR_ARG (exp, 0);
38554 arg1 = CALL_EXPR_ARG (exp, 1);
38555 arg2 = CALL_EXPR_ARG (exp, 2);
38557 tmode = TYPE_MODE (TREE_TYPE (arg0));
38558 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38559 gcc_assert (VECTOR_MODE_P (tmode));
38561 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38562 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38563 elt = get_element_number (TREE_TYPE (arg0), arg2);
38565 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38566 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38568 op0 = force_reg (tmode, op0);
38569 op1 = force_reg (mode1, op1);
38571 /* OP0 is the source of these builtin functions and shouldn't be
38572 modified. Create a copy, use it and return it as target. */
38573 target = gen_reg_rtx (tmode);
38574 emit_move_insn (target, op0);
38575 ix86_expand_vector_set (true, target, op1, elt);
38577 return target;
38580 /* Emit conditional move of SRC to DST with condition
38581 OP1 CODE OP2. */
38582 static void
38583 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38585 rtx t;
38587 if (TARGET_CMOVE)
38589 t = ix86_expand_compare (code, op1, op2);
38590 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38591 src, dst)));
38593 else
38595 rtx_code_label *nomove = gen_label_rtx ();
38596 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38597 const0_rtx, GET_MODE (op1), 1, nomove);
38598 emit_move_insn (dst, src);
38599 emit_label (nomove);
38603 /* Choose max of DST and SRC and put it to DST. */
38604 static void
38605 ix86_emit_move_max (rtx dst, rtx src)
38607 ix86_emit_cmove (dst, src, LTU, dst, src);
38610 /* Expand an expression EXP that calls a built-in function,
38611 with result going to TARGET if that's convenient
38612 (and in mode MODE if that's convenient).
38613 SUBTARGET may be used as the target for computing one of EXP's operands.
38614 IGNORE is nonzero if the value is to be ignored. */
38616 static rtx
38617 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38618 machine_mode mode, int ignore)
38620 const struct builtin_description *d;
38621 size_t i;
38622 enum insn_code icode;
38623 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38624 tree arg0, arg1, arg2, arg3, arg4;
38625 rtx op0, op1, op2, op3, op4, pat, insn;
38626 machine_mode mode0, mode1, mode2, mode3, mode4;
38627 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38629 /* For CPU builtins that can be folded, fold first and expand the fold. */
38630 switch (fcode)
38632 case IX86_BUILTIN_CPU_INIT:
38634 /* Make it call __cpu_indicator_init in libgcc. */
38635 tree call_expr, fndecl, type;
38636 type = build_function_type_list (integer_type_node, NULL_TREE);
38637 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38638 call_expr = build_call_expr (fndecl, 0);
38639 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38641 case IX86_BUILTIN_CPU_IS:
38642 case IX86_BUILTIN_CPU_SUPPORTS:
38644 tree arg0 = CALL_EXPR_ARG (exp, 0);
38645 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38646 gcc_assert (fold_expr != NULL_TREE);
38647 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38651 /* Determine whether the builtin function is available under the current ISA.
38652 Originally the builtin was not created if it wasn't applicable to the
38653 current ISA based on the command line switches. With function specific
38654 options, we need to check in the context of the function making the call
38655 whether it is supported. */
38656 if (ix86_builtins_isa[fcode].isa
38657 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38659 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38660 NULL, (enum fpmath_unit) 0, false);
38662 if (!opts)
38663 error ("%qE needs unknown isa option", fndecl);
38664 else
38666 gcc_assert (opts != NULL);
38667 error ("%qE needs isa option %s", fndecl, opts);
38668 free (opts);
38670 return const0_rtx;
38673 switch (fcode)
38675 case IX86_BUILTIN_BNDMK:
38676 if (!target
38677 || GET_MODE (target) != BNDmode
38678 || !register_operand (target, BNDmode))
38679 target = gen_reg_rtx (BNDmode);
38681 arg0 = CALL_EXPR_ARG (exp, 0);
38682 arg1 = CALL_EXPR_ARG (exp, 1);
38684 op0 = expand_normal (arg0);
38685 op1 = expand_normal (arg1);
38687 if (!register_operand (op0, Pmode))
38688 op0 = ix86_zero_extend_to_Pmode (op0);
38689 if (!register_operand (op1, Pmode))
38690 op1 = ix86_zero_extend_to_Pmode (op1);
38692 /* Builtin arg1 is size of block but instruction op1 should
38693 be (size - 1). */
38694 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38695 NULL_RTX, 1, OPTAB_DIRECT);
38697 emit_insn (BNDmode == BND64mode
38698 ? gen_bnd64_mk (target, op0, op1)
38699 : gen_bnd32_mk (target, op0, op1));
38700 return target;
38702 case IX86_BUILTIN_BNDSTX:
38703 arg0 = CALL_EXPR_ARG (exp, 0);
38704 arg1 = CALL_EXPR_ARG (exp, 1);
38705 arg2 = CALL_EXPR_ARG (exp, 2);
38707 op0 = expand_normal (arg0);
38708 op1 = expand_normal (arg1);
38709 op2 = expand_normal (arg2);
38711 if (!register_operand (op0, Pmode))
38712 op0 = ix86_zero_extend_to_Pmode (op0);
38713 if (!register_operand (op1, BNDmode))
38714 op1 = copy_to_mode_reg (BNDmode, op1);
38715 if (!register_operand (op2, Pmode))
38716 op2 = ix86_zero_extend_to_Pmode (op2);
38718 emit_insn (BNDmode == BND64mode
38719 ? gen_bnd64_stx (op2, op0, op1)
38720 : gen_bnd32_stx (op2, op0, op1));
38721 return 0;
38723 case IX86_BUILTIN_BNDLDX:
38724 if (!target
38725 || GET_MODE (target) != BNDmode
38726 || !register_operand (target, BNDmode))
38727 target = gen_reg_rtx (BNDmode);
38729 arg0 = CALL_EXPR_ARG (exp, 0);
38730 arg1 = CALL_EXPR_ARG (exp, 1);
38732 op0 = expand_normal (arg0);
38733 op1 = expand_normal (arg1);
38735 if (!register_operand (op0, Pmode))
38736 op0 = ix86_zero_extend_to_Pmode (op0);
38737 if (!register_operand (op1, Pmode))
38738 op1 = ix86_zero_extend_to_Pmode (op1);
38740 emit_insn (BNDmode == BND64mode
38741 ? gen_bnd64_ldx (target, op0, op1)
38742 : gen_bnd32_ldx (target, op0, op1));
38743 return target;
38745 case IX86_BUILTIN_BNDCL:
38746 arg0 = CALL_EXPR_ARG (exp, 0);
38747 arg1 = CALL_EXPR_ARG (exp, 1);
38749 op0 = expand_normal (arg0);
38750 op1 = expand_normal (arg1);
38752 if (!register_operand (op0, Pmode))
38753 op0 = ix86_zero_extend_to_Pmode (op0);
38754 if (!register_operand (op1, BNDmode))
38755 op1 = copy_to_mode_reg (BNDmode, op1);
38757 emit_insn (BNDmode == BND64mode
38758 ? gen_bnd64_cl (op1, op0)
38759 : gen_bnd32_cl (op1, op0));
38760 return 0;
38762 case IX86_BUILTIN_BNDCU:
38763 arg0 = CALL_EXPR_ARG (exp, 0);
38764 arg1 = CALL_EXPR_ARG (exp, 1);
38766 op0 = expand_normal (arg0);
38767 op1 = expand_normal (arg1);
38769 if (!register_operand (op0, Pmode))
38770 op0 = ix86_zero_extend_to_Pmode (op0);
38771 if (!register_operand (op1, BNDmode))
38772 op1 = copy_to_mode_reg (BNDmode, op1);
38774 emit_insn (BNDmode == BND64mode
38775 ? gen_bnd64_cu (op1, op0)
38776 : gen_bnd32_cu (op1, op0));
38777 return 0;
38779 case IX86_BUILTIN_BNDRET:
38780 arg0 = CALL_EXPR_ARG (exp, 0);
38781 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38782 target = chkp_get_rtl_bounds (arg0);
38784 /* If no bounds were specified for returned value,
38785 then use INIT bounds. It usually happens when
38786 some built-in function is expanded. */
38787 if (!target)
38789 rtx t1 = gen_reg_rtx (Pmode);
38790 rtx t2 = gen_reg_rtx (Pmode);
38791 target = gen_reg_rtx (BNDmode);
38792 emit_move_insn (t1, const0_rtx);
38793 emit_move_insn (t2, constm1_rtx);
38794 emit_insn (BNDmode == BND64mode
38795 ? gen_bnd64_mk (target, t1, t2)
38796 : gen_bnd32_mk (target, t1, t2));
38799 gcc_assert (target && REG_P (target));
38800 return target;
38802 case IX86_BUILTIN_BNDNARROW:
38804 rtx m1, m1h1, m1h2, lb, ub, t1;
38806 /* Return value and lb. */
38807 arg0 = CALL_EXPR_ARG (exp, 0);
38808 /* Bounds. */
38809 arg1 = CALL_EXPR_ARG (exp, 1);
38810 /* Size. */
38811 arg2 = CALL_EXPR_ARG (exp, 2);
38813 lb = expand_normal (arg0);
38814 op1 = expand_normal (arg1);
38815 op2 = expand_normal (arg2);
38817 /* Size was passed but we need to use (size - 1) as for bndmk. */
38818 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38819 NULL_RTX, 1, OPTAB_DIRECT);
38821 /* Add LB to size and inverse to get UB. */
38822 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38823 op2, 1, OPTAB_DIRECT);
38824 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38826 if (!register_operand (lb, Pmode))
38827 lb = ix86_zero_extend_to_Pmode (lb);
38828 if (!register_operand (ub, Pmode))
38829 ub = ix86_zero_extend_to_Pmode (ub);
38831 /* We need to move bounds to memory before any computations. */
38832 if (MEM_P (op1))
38833 m1 = op1;
38834 else
38836 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38837 emit_move_insn (m1, op1);
38840 /* Generate mem expression to be used for access to LB and UB. */
38841 m1h1 = adjust_address (m1, Pmode, 0);
38842 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38844 t1 = gen_reg_rtx (Pmode);
38846 /* Compute LB. */
38847 emit_move_insn (t1, m1h1);
38848 ix86_emit_move_max (t1, lb);
38849 emit_move_insn (m1h1, t1);
38851 /* Compute UB. UB is stored in 1's complement form. Therefore
38852 we also use max here. */
38853 emit_move_insn (t1, m1h2);
38854 ix86_emit_move_max (t1, ub);
38855 emit_move_insn (m1h2, t1);
38857 op2 = gen_reg_rtx (BNDmode);
38858 emit_move_insn (op2, m1);
38860 return chkp_join_splitted_slot (lb, op2);
38863 case IX86_BUILTIN_BNDINT:
38865 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38867 if (!target
38868 || GET_MODE (target) != BNDmode
38869 || !register_operand (target, BNDmode))
38870 target = gen_reg_rtx (BNDmode);
38872 arg0 = CALL_EXPR_ARG (exp, 0);
38873 arg1 = CALL_EXPR_ARG (exp, 1);
38875 op0 = expand_normal (arg0);
38876 op1 = expand_normal (arg1);
38878 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38879 rh1 = adjust_address (res, Pmode, 0);
38880 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38882 /* Put first bounds to temporaries. */
38883 lb1 = gen_reg_rtx (Pmode);
38884 ub1 = gen_reg_rtx (Pmode);
38885 if (MEM_P (op0))
38887 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38888 emit_move_insn (ub1, adjust_address (op0, Pmode,
38889 GET_MODE_SIZE (Pmode)));
38891 else
38893 emit_move_insn (res, op0);
38894 emit_move_insn (lb1, rh1);
38895 emit_move_insn (ub1, rh2);
38898 /* Put second bounds to temporaries. */
38899 lb2 = gen_reg_rtx (Pmode);
38900 ub2 = gen_reg_rtx (Pmode);
38901 if (MEM_P (op1))
38903 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38904 emit_move_insn (ub2, adjust_address (op1, Pmode,
38905 GET_MODE_SIZE (Pmode)));
38907 else
38909 emit_move_insn (res, op1);
38910 emit_move_insn (lb2, rh1);
38911 emit_move_insn (ub2, rh2);
38914 /* Compute LB. */
38915 ix86_emit_move_max (lb1, lb2);
38916 emit_move_insn (rh1, lb1);
38918 /* Compute UB. UB is stored in 1's complement form. Therefore
38919 we also use max here. */
38920 ix86_emit_move_max (ub1, ub2);
38921 emit_move_insn (rh2, ub1);
38923 emit_move_insn (target, res);
38925 return target;
38928 case IX86_BUILTIN_SIZEOF:
38930 tree name;
38931 rtx symbol;
38933 if (!target
38934 || GET_MODE (target) != Pmode
38935 || !register_operand (target, Pmode))
38936 target = gen_reg_rtx (Pmode);
38938 arg0 = CALL_EXPR_ARG (exp, 0);
38939 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38941 name = DECL_ASSEMBLER_NAME (arg0);
38942 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38944 emit_insn (Pmode == SImode
38945 ? gen_move_size_reloc_si (target, symbol)
38946 : gen_move_size_reloc_di (target, symbol));
38948 return target;
38951 case IX86_BUILTIN_BNDLOWER:
38953 rtx mem, hmem;
38955 if (!target
38956 || GET_MODE (target) != Pmode
38957 || !register_operand (target, Pmode))
38958 target = gen_reg_rtx (Pmode);
38960 arg0 = CALL_EXPR_ARG (exp, 0);
38961 op0 = expand_normal (arg0);
38963 /* We need to move bounds to memory first. */
38964 if (MEM_P (op0))
38965 mem = op0;
38966 else
38968 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38969 emit_move_insn (mem, op0);
38972 /* Generate mem expression to access LB and load it. */
38973 hmem = adjust_address (mem, Pmode, 0);
38974 emit_move_insn (target, hmem);
38976 return target;
38979 case IX86_BUILTIN_BNDUPPER:
38981 rtx mem, hmem, res;
38983 if (!target
38984 || GET_MODE (target) != Pmode
38985 || !register_operand (target, Pmode))
38986 target = gen_reg_rtx (Pmode);
38988 arg0 = CALL_EXPR_ARG (exp, 0);
38989 op0 = expand_normal (arg0);
38991 /* We need to move bounds to memory first. */
38992 if (MEM_P (op0))
38993 mem = op0;
38994 else
38996 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
38997 emit_move_insn (mem, op0);
39000 /* Generate mem expression to access UB. */
39001 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
39003 /* We need to inverse all bits of UB. */
39004 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
39006 if (res != target)
39007 emit_move_insn (target, res);
39009 return target;
39012 case IX86_BUILTIN_MASKMOVQ:
39013 case IX86_BUILTIN_MASKMOVDQU:
39014 icode = (fcode == IX86_BUILTIN_MASKMOVQ
39015 ? CODE_FOR_mmx_maskmovq
39016 : CODE_FOR_sse2_maskmovdqu);
39017 /* Note the arg order is different from the operand order. */
39018 arg1 = CALL_EXPR_ARG (exp, 0);
39019 arg2 = CALL_EXPR_ARG (exp, 1);
39020 arg0 = CALL_EXPR_ARG (exp, 2);
39021 op0 = expand_normal (arg0);
39022 op1 = expand_normal (arg1);
39023 op2 = expand_normal (arg2);
39024 mode0 = insn_data[icode].operand[0].mode;
39025 mode1 = insn_data[icode].operand[1].mode;
39026 mode2 = insn_data[icode].operand[2].mode;
39028 op0 = ix86_zero_extend_to_Pmode (op0);
39029 op0 = gen_rtx_MEM (mode1, op0);
39031 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39032 op0 = copy_to_mode_reg (mode0, op0);
39033 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39034 op1 = copy_to_mode_reg (mode1, op1);
39035 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39036 op2 = copy_to_mode_reg (mode2, op2);
39037 pat = GEN_FCN (icode) (op0, op1, op2);
39038 if (! pat)
39039 return 0;
39040 emit_insn (pat);
39041 return 0;
39043 case IX86_BUILTIN_LDMXCSR:
39044 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39045 target = assign_386_stack_local (SImode, SLOT_TEMP);
39046 emit_move_insn (target, op0);
39047 emit_insn (gen_sse_ldmxcsr (target));
39048 return 0;
39050 case IX86_BUILTIN_STMXCSR:
39051 target = assign_386_stack_local (SImode, SLOT_TEMP);
39052 emit_insn (gen_sse_stmxcsr (target));
39053 return copy_to_mode_reg (SImode, target);
39055 case IX86_BUILTIN_CLFLUSH:
39056 arg0 = CALL_EXPR_ARG (exp, 0);
39057 op0 = expand_normal (arg0);
39058 icode = CODE_FOR_sse2_clflush;
39059 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39060 op0 = ix86_zero_extend_to_Pmode (op0);
39062 emit_insn (gen_sse2_clflush (op0));
39063 return 0;
39065 case IX86_BUILTIN_CLWB:
39066 arg0 = CALL_EXPR_ARG (exp, 0);
39067 op0 = expand_normal (arg0);
39068 icode = CODE_FOR_clwb;
39069 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39070 op0 = ix86_zero_extend_to_Pmode (op0);
39072 emit_insn (gen_clwb (op0));
39073 return 0;
39075 case IX86_BUILTIN_CLFLUSHOPT:
39076 arg0 = CALL_EXPR_ARG (exp, 0);
39077 op0 = expand_normal (arg0);
39078 icode = CODE_FOR_clflushopt;
39079 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39080 op0 = ix86_zero_extend_to_Pmode (op0);
39082 emit_insn (gen_clflushopt (op0));
39083 return 0;
39085 case IX86_BUILTIN_MONITOR:
39086 case IX86_BUILTIN_MONITORX:
39087 arg0 = CALL_EXPR_ARG (exp, 0);
39088 arg1 = CALL_EXPR_ARG (exp, 1);
39089 arg2 = CALL_EXPR_ARG (exp, 2);
39090 op0 = expand_normal (arg0);
39091 op1 = expand_normal (arg1);
39092 op2 = expand_normal (arg2);
39093 if (!REG_P (op0))
39094 op0 = ix86_zero_extend_to_Pmode (op0);
39095 if (!REG_P (op1))
39096 op1 = copy_to_mode_reg (SImode, op1);
39097 if (!REG_P (op2))
39098 op2 = copy_to_mode_reg (SImode, op2);
39100 emit_insn (fcode == IX86_BUILTIN_MONITOR
39101 ? ix86_gen_monitor (op0, op1, op2)
39102 : ix86_gen_monitorx (op0, op1, op2));
39103 return 0;
39105 case IX86_BUILTIN_MWAIT:
39106 arg0 = CALL_EXPR_ARG (exp, 0);
39107 arg1 = CALL_EXPR_ARG (exp, 1);
39108 op0 = expand_normal (arg0);
39109 op1 = expand_normal (arg1);
39110 if (!REG_P (op0))
39111 op0 = copy_to_mode_reg (SImode, op0);
39112 if (!REG_P (op1))
39113 op1 = copy_to_mode_reg (SImode, op1);
39114 emit_insn (gen_sse3_mwait (op0, op1));
39115 return 0;
39117 case IX86_BUILTIN_MWAITX:
39118 arg0 = CALL_EXPR_ARG (exp, 0);
39119 arg1 = CALL_EXPR_ARG (exp, 1);
39120 arg2 = CALL_EXPR_ARG (exp, 2);
39121 op0 = expand_normal (arg0);
39122 op1 = expand_normal (arg1);
39123 op2 = expand_normal (arg2);
39124 if (!REG_P (op0))
39125 op0 = copy_to_mode_reg (SImode, op0);
39126 if (!REG_P (op1))
39127 op1 = copy_to_mode_reg (SImode, op1);
39128 if (!REG_P (op2))
39129 op2 = copy_to_mode_reg (SImode, op2);
39130 emit_insn (gen_mwaitx (op0, op1, op2));
39131 return 0;
39133 case IX86_BUILTIN_VEC_INIT_V2SI:
39134 case IX86_BUILTIN_VEC_INIT_V4HI:
39135 case IX86_BUILTIN_VEC_INIT_V8QI:
39136 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39138 case IX86_BUILTIN_VEC_EXT_V2DF:
39139 case IX86_BUILTIN_VEC_EXT_V2DI:
39140 case IX86_BUILTIN_VEC_EXT_V4SF:
39141 case IX86_BUILTIN_VEC_EXT_V4SI:
39142 case IX86_BUILTIN_VEC_EXT_V8HI:
39143 case IX86_BUILTIN_VEC_EXT_V2SI:
39144 case IX86_BUILTIN_VEC_EXT_V4HI:
39145 case IX86_BUILTIN_VEC_EXT_V16QI:
39146 return ix86_expand_vec_ext_builtin (exp, target);
39148 case IX86_BUILTIN_VEC_SET_V2DI:
39149 case IX86_BUILTIN_VEC_SET_V4SF:
39150 case IX86_BUILTIN_VEC_SET_V4SI:
39151 case IX86_BUILTIN_VEC_SET_V8HI:
39152 case IX86_BUILTIN_VEC_SET_V4HI:
39153 case IX86_BUILTIN_VEC_SET_V16QI:
39154 return ix86_expand_vec_set_builtin (exp);
39156 case IX86_BUILTIN_INFQ:
39157 case IX86_BUILTIN_HUGE_VALQ:
39159 REAL_VALUE_TYPE inf;
39160 rtx tmp;
39162 real_inf (&inf);
39163 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39165 tmp = validize_mem (force_const_mem (mode, tmp));
39167 if (target == 0)
39168 target = gen_reg_rtx (mode);
39170 emit_move_insn (target, tmp);
39171 return target;
39174 case IX86_BUILTIN_RDPMC:
39175 case IX86_BUILTIN_RDTSC:
39176 case IX86_BUILTIN_RDTSCP:
39178 op0 = gen_reg_rtx (DImode);
39179 op1 = gen_reg_rtx (DImode);
39181 if (fcode == IX86_BUILTIN_RDPMC)
39183 arg0 = CALL_EXPR_ARG (exp, 0);
39184 op2 = expand_normal (arg0);
39185 if (!register_operand (op2, SImode))
39186 op2 = copy_to_mode_reg (SImode, op2);
39188 insn = (TARGET_64BIT
39189 ? gen_rdpmc_rex64 (op0, op1, op2)
39190 : gen_rdpmc (op0, op2));
39191 emit_insn (insn);
39193 else if (fcode == IX86_BUILTIN_RDTSC)
39195 insn = (TARGET_64BIT
39196 ? gen_rdtsc_rex64 (op0, op1)
39197 : gen_rdtsc (op0));
39198 emit_insn (insn);
39200 else
39202 op2 = gen_reg_rtx (SImode);
39204 insn = (TARGET_64BIT
39205 ? gen_rdtscp_rex64 (op0, op1, op2)
39206 : gen_rdtscp (op0, op2));
39207 emit_insn (insn);
39209 arg0 = CALL_EXPR_ARG (exp, 0);
39210 op4 = expand_normal (arg0);
39211 if (!address_operand (op4, VOIDmode))
39213 op4 = convert_memory_address (Pmode, op4);
39214 op4 = copy_addr_to_reg (op4);
39216 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39219 if (target == 0)
39221 /* mode is VOIDmode if __builtin_rd* has been called
39222 without lhs. */
39223 if (mode == VOIDmode)
39224 return target;
39225 target = gen_reg_rtx (mode);
39228 if (TARGET_64BIT)
39230 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39231 op1, 1, OPTAB_DIRECT);
39232 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39233 op0, 1, OPTAB_DIRECT);
39236 emit_move_insn (target, op0);
39237 return target;
39239 case IX86_BUILTIN_FXSAVE:
39240 case IX86_BUILTIN_FXRSTOR:
39241 case IX86_BUILTIN_FXSAVE64:
39242 case IX86_BUILTIN_FXRSTOR64:
39243 case IX86_BUILTIN_FNSTENV:
39244 case IX86_BUILTIN_FLDENV:
39245 mode0 = BLKmode;
39246 switch (fcode)
39248 case IX86_BUILTIN_FXSAVE:
39249 icode = CODE_FOR_fxsave;
39250 break;
39251 case IX86_BUILTIN_FXRSTOR:
39252 icode = CODE_FOR_fxrstor;
39253 break;
39254 case IX86_BUILTIN_FXSAVE64:
39255 icode = CODE_FOR_fxsave64;
39256 break;
39257 case IX86_BUILTIN_FXRSTOR64:
39258 icode = CODE_FOR_fxrstor64;
39259 break;
39260 case IX86_BUILTIN_FNSTENV:
39261 icode = CODE_FOR_fnstenv;
39262 break;
39263 case IX86_BUILTIN_FLDENV:
39264 icode = CODE_FOR_fldenv;
39265 break;
39266 default:
39267 gcc_unreachable ();
39270 arg0 = CALL_EXPR_ARG (exp, 0);
39271 op0 = expand_normal (arg0);
39273 if (!address_operand (op0, VOIDmode))
39275 op0 = convert_memory_address (Pmode, op0);
39276 op0 = copy_addr_to_reg (op0);
39278 op0 = gen_rtx_MEM (mode0, op0);
39280 pat = GEN_FCN (icode) (op0);
39281 if (pat)
39282 emit_insn (pat);
39283 return 0;
39285 case IX86_BUILTIN_XSAVE:
39286 case IX86_BUILTIN_XRSTOR:
39287 case IX86_BUILTIN_XSAVE64:
39288 case IX86_BUILTIN_XRSTOR64:
39289 case IX86_BUILTIN_XSAVEOPT:
39290 case IX86_BUILTIN_XSAVEOPT64:
39291 case IX86_BUILTIN_XSAVES:
39292 case IX86_BUILTIN_XRSTORS:
39293 case IX86_BUILTIN_XSAVES64:
39294 case IX86_BUILTIN_XRSTORS64:
39295 case IX86_BUILTIN_XSAVEC:
39296 case IX86_BUILTIN_XSAVEC64:
39297 arg0 = CALL_EXPR_ARG (exp, 0);
39298 arg1 = CALL_EXPR_ARG (exp, 1);
39299 op0 = expand_normal (arg0);
39300 op1 = expand_normal (arg1);
39302 if (!address_operand (op0, VOIDmode))
39304 op0 = convert_memory_address (Pmode, op0);
39305 op0 = copy_addr_to_reg (op0);
39307 op0 = gen_rtx_MEM (BLKmode, op0);
39309 op1 = force_reg (DImode, op1);
39311 if (TARGET_64BIT)
39313 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39314 NULL, 1, OPTAB_DIRECT);
39315 switch (fcode)
39317 case IX86_BUILTIN_XSAVE:
39318 icode = CODE_FOR_xsave_rex64;
39319 break;
39320 case IX86_BUILTIN_XRSTOR:
39321 icode = CODE_FOR_xrstor_rex64;
39322 break;
39323 case IX86_BUILTIN_XSAVE64:
39324 icode = CODE_FOR_xsave64;
39325 break;
39326 case IX86_BUILTIN_XRSTOR64:
39327 icode = CODE_FOR_xrstor64;
39328 break;
39329 case IX86_BUILTIN_XSAVEOPT:
39330 icode = CODE_FOR_xsaveopt_rex64;
39331 break;
39332 case IX86_BUILTIN_XSAVEOPT64:
39333 icode = CODE_FOR_xsaveopt64;
39334 break;
39335 case IX86_BUILTIN_XSAVES:
39336 icode = CODE_FOR_xsaves_rex64;
39337 break;
39338 case IX86_BUILTIN_XRSTORS:
39339 icode = CODE_FOR_xrstors_rex64;
39340 break;
39341 case IX86_BUILTIN_XSAVES64:
39342 icode = CODE_FOR_xsaves64;
39343 break;
39344 case IX86_BUILTIN_XRSTORS64:
39345 icode = CODE_FOR_xrstors64;
39346 break;
39347 case IX86_BUILTIN_XSAVEC:
39348 icode = CODE_FOR_xsavec_rex64;
39349 break;
39350 case IX86_BUILTIN_XSAVEC64:
39351 icode = CODE_FOR_xsavec64;
39352 break;
39353 default:
39354 gcc_unreachable ();
39357 op2 = gen_lowpart (SImode, op2);
39358 op1 = gen_lowpart (SImode, op1);
39359 pat = GEN_FCN (icode) (op0, op1, op2);
39361 else
39363 switch (fcode)
39365 case IX86_BUILTIN_XSAVE:
39366 icode = CODE_FOR_xsave;
39367 break;
39368 case IX86_BUILTIN_XRSTOR:
39369 icode = CODE_FOR_xrstor;
39370 break;
39371 case IX86_BUILTIN_XSAVEOPT:
39372 icode = CODE_FOR_xsaveopt;
39373 break;
39374 case IX86_BUILTIN_XSAVES:
39375 icode = CODE_FOR_xsaves;
39376 break;
39377 case IX86_BUILTIN_XRSTORS:
39378 icode = CODE_FOR_xrstors;
39379 break;
39380 case IX86_BUILTIN_XSAVEC:
39381 icode = CODE_FOR_xsavec;
39382 break;
39383 default:
39384 gcc_unreachable ();
39386 pat = GEN_FCN (icode) (op0, op1);
39389 if (pat)
39390 emit_insn (pat);
39391 return 0;
39393 case IX86_BUILTIN_LLWPCB:
39394 arg0 = CALL_EXPR_ARG (exp, 0);
39395 op0 = expand_normal (arg0);
39396 icode = CODE_FOR_lwp_llwpcb;
39397 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39398 op0 = ix86_zero_extend_to_Pmode (op0);
39399 emit_insn (gen_lwp_llwpcb (op0));
39400 return 0;
39402 case IX86_BUILTIN_SLWPCB:
39403 icode = CODE_FOR_lwp_slwpcb;
39404 if (!target
39405 || !insn_data[icode].operand[0].predicate (target, Pmode))
39406 target = gen_reg_rtx (Pmode);
39407 emit_insn (gen_lwp_slwpcb (target));
39408 return target;
39410 case IX86_BUILTIN_BEXTRI32:
39411 case IX86_BUILTIN_BEXTRI64:
39412 arg0 = CALL_EXPR_ARG (exp, 0);
39413 arg1 = CALL_EXPR_ARG (exp, 1);
39414 op0 = expand_normal (arg0);
39415 op1 = expand_normal (arg1);
39416 icode = (fcode == IX86_BUILTIN_BEXTRI32
39417 ? CODE_FOR_tbm_bextri_si
39418 : CODE_FOR_tbm_bextri_di);
39419 if (!CONST_INT_P (op1))
39421 error ("last argument must be an immediate");
39422 return const0_rtx;
39424 else
39426 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39427 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39428 op1 = GEN_INT (length);
39429 op2 = GEN_INT (lsb_index);
39430 pat = GEN_FCN (icode) (target, op0, op1, op2);
39431 if (pat)
39432 emit_insn (pat);
39433 return target;
39436 case IX86_BUILTIN_RDRAND16_STEP:
39437 icode = CODE_FOR_rdrandhi_1;
39438 mode0 = HImode;
39439 goto rdrand_step;
39441 case IX86_BUILTIN_RDRAND32_STEP:
39442 icode = CODE_FOR_rdrandsi_1;
39443 mode0 = SImode;
39444 goto rdrand_step;
39446 case IX86_BUILTIN_RDRAND64_STEP:
39447 icode = CODE_FOR_rdranddi_1;
39448 mode0 = DImode;
39450 rdrand_step:
39451 op0 = gen_reg_rtx (mode0);
39452 emit_insn (GEN_FCN (icode) (op0));
39454 arg0 = CALL_EXPR_ARG (exp, 0);
39455 op1 = expand_normal (arg0);
39456 if (!address_operand (op1, VOIDmode))
39458 op1 = convert_memory_address (Pmode, op1);
39459 op1 = copy_addr_to_reg (op1);
39461 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39463 op1 = gen_reg_rtx (SImode);
39464 emit_move_insn (op1, CONST1_RTX (SImode));
39466 /* Emit SImode conditional move. */
39467 if (mode0 == HImode)
39469 op2 = gen_reg_rtx (SImode);
39470 emit_insn (gen_zero_extendhisi2 (op2, op0));
39472 else if (mode0 == SImode)
39473 op2 = op0;
39474 else
39475 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39477 if (target == 0
39478 || !register_operand (target, SImode))
39479 target = gen_reg_rtx (SImode);
39481 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39482 const0_rtx);
39483 emit_insn (gen_rtx_SET (target,
39484 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39485 return target;
39487 case IX86_BUILTIN_RDSEED16_STEP:
39488 icode = CODE_FOR_rdseedhi_1;
39489 mode0 = HImode;
39490 goto rdseed_step;
39492 case IX86_BUILTIN_RDSEED32_STEP:
39493 icode = CODE_FOR_rdseedsi_1;
39494 mode0 = SImode;
39495 goto rdseed_step;
39497 case IX86_BUILTIN_RDSEED64_STEP:
39498 icode = CODE_FOR_rdseeddi_1;
39499 mode0 = DImode;
39501 rdseed_step:
39502 op0 = gen_reg_rtx (mode0);
39503 emit_insn (GEN_FCN (icode) (op0));
39505 arg0 = CALL_EXPR_ARG (exp, 0);
39506 op1 = expand_normal (arg0);
39507 if (!address_operand (op1, VOIDmode))
39509 op1 = convert_memory_address (Pmode, op1);
39510 op1 = copy_addr_to_reg (op1);
39512 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39514 op2 = gen_reg_rtx (QImode);
39516 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39517 const0_rtx);
39518 emit_insn (gen_rtx_SET (op2, pat));
39520 if (target == 0
39521 || !register_operand (target, SImode))
39522 target = gen_reg_rtx (SImode);
39524 emit_insn (gen_zero_extendqisi2 (target, op2));
39525 return target;
39527 case IX86_BUILTIN_SBB32:
39528 icode = CODE_FOR_subborrowsi;
39529 mode0 = SImode;
39530 goto handlecarry;
39532 case IX86_BUILTIN_SBB64:
39533 icode = CODE_FOR_subborrowdi;
39534 mode0 = DImode;
39535 goto handlecarry;
39537 case IX86_BUILTIN_ADDCARRYX32:
39538 icode = CODE_FOR_addcarrysi;
39539 mode0 = SImode;
39540 goto handlecarry;
39542 case IX86_BUILTIN_ADDCARRYX64:
39543 icode = CODE_FOR_addcarrydi;
39544 mode0 = DImode;
39546 handlecarry:
39547 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39548 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39549 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39550 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39552 op1 = expand_normal (arg0);
39553 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39555 op2 = expand_normal (arg1);
39556 if (!register_operand (op2, mode0))
39557 op2 = copy_to_mode_reg (mode0, op2);
39559 op3 = expand_normal (arg2);
39560 if (!register_operand (op3, mode0))
39561 op3 = copy_to_mode_reg (mode0, op3);
39563 op4 = expand_normal (arg3);
39564 if (!address_operand (op4, VOIDmode))
39566 op4 = convert_memory_address (Pmode, op4);
39567 op4 = copy_addr_to_reg (op4);
39570 /* Generate CF from input operand. */
39571 emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
39573 /* Generate instruction that consumes CF. */
39574 op0 = gen_reg_rtx (mode0);
39576 op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
39577 pat = gen_rtx_LTU (mode0, op1, const0_rtx);
39578 emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
39580 /* Return current CF value. */
39581 if (target == 0)
39582 target = gen_reg_rtx (QImode);
39584 PUT_MODE (pat, QImode);
39585 emit_insn (gen_rtx_SET (target, pat));
39587 /* Store the result. */
39588 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39590 return target;
39592 case IX86_BUILTIN_READ_FLAGS:
39593 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39595 if (optimize
39596 || target == NULL_RTX
39597 || !nonimmediate_operand (target, word_mode)
39598 || GET_MODE (target) != word_mode)
39599 target = gen_reg_rtx (word_mode);
39601 emit_insn (gen_pop (target));
39602 return target;
39604 case IX86_BUILTIN_WRITE_FLAGS:
39606 arg0 = CALL_EXPR_ARG (exp, 0);
39607 op0 = expand_normal (arg0);
39608 if (!general_no_elim_operand (op0, word_mode))
39609 op0 = copy_to_mode_reg (word_mode, op0);
39611 emit_insn (gen_push (op0));
39612 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39613 return 0;
39615 case IX86_BUILTIN_KORTESTC16:
39616 icode = CODE_FOR_kortestchi;
39617 mode0 = HImode;
39618 mode1 = CCCmode;
39619 goto kortest;
39621 case IX86_BUILTIN_KORTESTZ16:
39622 icode = CODE_FOR_kortestzhi;
39623 mode0 = HImode;
39624 mode1 = CCZmode;
39626 kortest:
39627 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39628 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39629 op0 = expand_normal (arg0);
39630 op1 = expand_normal (arg1);
39632 op0 = copy_to_reg (op0);
39633 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39634 op1 = copy_to_reg (op1);
39635 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39637 target = gen_reg_rtx (QImode);
39638 emit_insn (gen_rtx_SET (target, const0_rtx));
39640 /* Emit kortest. */
39641 emit_insn (GEN_FCN (icode) (op0, op1));
39642 /* And use setcc to return result from flags. */
39643 ix86_expand_setcc (target, EQ,
39644 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39645 return target;
39647 case IX86_BUILTIN_GATHERSIV2DF:
39648 icode = CODE_FOR_avx2_gathersiv2df;
39649 goto gather_gen;
39650 case IX86_BUILTIN_GATHERSIV4DF:
39651 icode = CODE_FOR_avx2_gathersiv4df;
39652 goto gather_gen;
39653 case IX86_BUILTIN_GATHERDIV2DF:
39654 icode = CODE_FOR_avx2_gatherdiv2df;
39655 goto gather_gen;
39656 case IX86_BUILTIN_GATHERDIV4DF:
39657 icode = CODE_FOR_avx2_gatherdiv4df;
39658 goto gather_gen;
39659 case IX86_BUILTIN_GATHERSIV4SF:
39660 icode = CODE_FOR_avx2_gathersiv4sf;
39661 goto gather_gen;
39662 case IX86_BUILTIN_GATHERSIV8SF:
39663 icode = CODE_FOR_avx2_gathersiv8sf;
39664 goto gather_gen;
39665 case IX86_BUILTIN_GATHERDIV4SF:
39666 icode = CODE_FOR_avx2_gatherdiv4sf;
39667 goto gather_gen;
39668 case IX86_BUILTIN_GATHERDIV8SF:
39669 icode = CODE_FOR_avx2_gatherdiv8sf;
39670 goto gather_gen;
39671 case IX86_BUILTIN_GATHERSIV2DI:
39672 icode = CODE_FOR_avx2_gathersiv2di;
39673 goto gather_gen;
39674 case IX86_BUILTIN_GATHERSIV4DI:
39675 icode = CODE_FOR_avx2_gathersiv4di;
39676 goto gather_gen;
39677 case IX86_BUILTIN_GATHERDIV2DI:
39678 icode = CODE_FOR_avx2_gatherdiv2di;
39679 goto gather_gen;
39680 case IX86_BUILTIN_GATHERDIV4DI:
39681 icode = CODE_FOR_avx2_gatherdiv4di;
39682 goto gather_gen;
39683 case IX86_BUILTIN_GATHERSIV4SI:
39684 icode = CODE_FOR_avx2_gathersiv4si;
39685 goto gather_gen;
39686 case IX86_BUILTIN_GATHERSIV8SI:
39687 icode = CODE_FOR_avx2_gathersiv8si;
39688 goto gather_gen;
39689 case IX86_BUILTIN_GATHERDIV4SI:
39690 icode = CODE_FOR_avx2_gatherdiv4si;
39691 goto gather_gen;
39692 case IX86_BUILTIN_GATHERDIV8SI:
39693 icode = CODE_FOR_avx2_gatherdiv8si;
39694 goto gather_gen;
39695 case IX86_BUILTIN_GATHERALTSIV4DF:
39696 icode = CODE_FOR_avx2_gathersiv4df;
39697 goto gather_gen;
39698 case IX86_BUILTIN_GATHERALTDIV8SF:
39699 icode = CODE_FOR_avx2_gatherdiv8sf;
39700 goto gather_gen;
39701 case IX86_BUILTIN_GATHERALTSIV4DI:
39702 icode = CODE_FOR_avx2_gathersiv4di;
39703 goto gather_gen;
39704 case IX86_BUILTIN_GATHERALTDIV8SI:
39705 icode = CODE_FOR_avx2_gatherdiv8si;
39706 goto gather_gen;
39707 case IX86_BUILTIN_GATHER3SIV16SF:
39708 icode = CODE_FOR_avx512f_gathersiv16sf;
39709 goto gather_gen;
39710 case IX86_BUILTIN_GATHER3SIV8DF:
39711 icode = CODE_FOR_avx512f_gathersiv8df;
39712 goto gather_gen;
39713 case IX86_BUILTIN_GATHER3DIV16SF:
39714 icode = CODE_FOR_avx512f_gatherdiv16sf;
39715 goto gather_gen;
39716 case IX86_BUILTIN_GATHER3DIV8DF:
39717 icode = CODE_FOR_avx512f_gatherdiv8df;
39718 goto gather_gen;
39719 case IX86_BUILTIN_GATHER3SIV16SI:
39720 icode = CODE_FOR_avx512f_gathersiv16si;
39721 goto gather_gen;
39722 case IX86_BUILTIN_GATHER3SIV8DI:
39723 icode = CODE_FOR_avx512f_gathersiv8di;
39724 goto gather_gen;
39725 case IX86_BUILTIN_GATHER3DIV16SI:
39726 icode = CODE_FOR_avx512f_gatherdiv16si;
39727 goto gather_gen;
39728 case IX86_BUILTIN_GATHER3DIV8DI:
39729 icode = CODE_FOR_avx512f_gatherdiv8di;
39730 goto gather_gen;
39731 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39732 icode = CODE_FOR_avx512f_gathersiv8df;
39733 goto gather_gen;
39734 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39735 icode = CODE_FOR_avx512f_gatherdiv16sf;
39736 goto gather_gen;
39737 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39738 icode = CODE_FOR_avx512f_gathersiv8di;
39739 goto gather_gen;
39740 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39741 icode = CODE_FOR_avx512f_gatherdiv16si;
39742 goto gather_gen;
39743 case IX86_BUILTIN_GATHER3SIV2DF:
39744 icode = CODE_FOR_avx512vl_gathersiv2df;
39745 goto gather_gen;
39746 case IX86_BUILTIN_GATHER3SIV4DF:
39747 icode = CODE_FOR_avx512vl_gathersiv4df;
39748 goto gather_gen;
39749 case IX86_BUILTIN_GATHER3DIV2DF:
39750 icode = CODE_FOR_avx512vl_gatherdiv2df;
39751 goto gather_gen;
39752 case IX86_BUILTIN_GATHER3DIV4DF:
39753 icode = CODE_FOR_avx512vl_gatherdiv4df;
39754 goto gather_gen;
39755 case IX86_BUILTIN_GATHER3SIV4SF:
39756 icode = CODE_FOR_avx512vl_gathersiv4sf;
39757 goto gather_gen;
39758 case IX86_BUILTIN_GATHER3SIV8SF:
39759 icode = CODE_FOR_avx512vl_gathersiv8sf;
39760 goto gather_gen;
39761 case IX86_BUILTIN_GATHER3DIV4SF:
39762 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39763 goto gather_gen;
39764 case IX86_BUILTIN_GATHER3DIV8SF:
39765 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39766 goto gather_gen;
39767 case IX86_BUILTIN_GATHER3SIV2DI:
39768 icode = CODE_FOR_avx512vl_gathersiv2di;
39769 goto gather_gen;
39770 case IX86_BUILTIN_GATHER3SIV4DI:
39771 icode = CODE_FOR_avx512vl_gathersiv4di;
39772 goto gather_gen;
39773 case IX86_BUILTIN_GATHER3DIV2DI:
39774 icode = CODE_FOR_avx512vl_gatherdiv2di;
39775 goto gather_gen;
39776 case IX86_BUILTIN_GATHER3DIV4DI:
39777 icode = CODE_FOR_avx512vl_gatherdiv4di;
39778 goto gather_gen;
39779 case IX86_BUILTIN_GATHER3SIV4SI:
39780 icode = CODE_FOR_avx512vl_gathersiv4si;
39781 goto gather_gen;
39782 case IX86_BUILTIN_GATHER3SIV8SI:
39783 icode = CODE_FOR_avx512vl_gathersiv8si;
39784 goto gather_gen;
39785 case IX86_BUILTIN_GATHER3DIV4SI:
39786 icode = CODE_FOR_avx512vl_gatherdiv4si;
39787 goto gather_gen;
39788 case IX86_BUILTIN_GATHER3DIV8SI:
39789 icode = CODE_FOR_avx512vl_gatherdiv8si;
39790 goto gather_gen;
39791 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39792 icode = CODE_FOR_avx512vl_gathersiv4df;
39793 goto gather_gen;
39794 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39795 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39796 goto gather_gen;
39797 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39798 icode = CODE_FOR_avx512vl_gathersiv4di;
39799 goto gather_gen;
39800 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39801 icode = CODE_FOR_avx512vl_gatherdiv8si;
39802 goto gather_gen;
39803 case IX86_BUILTIN_SCATTERSIV16SF:
39804 icode = CODE_FOR_avx512f_scattersiv16sf;
39805 goto scatter_gen;
39806 case IX86_BUILTIN_SCATTERSIV8DF:
39807 icode = CODE_FOR_avx512f_scattersiv8df;
39808 goto scatter_gen;
39809 case IX86_BUILTIN_SCATTERDIV16SF:
39810 icode = CODE_FOR_avx512f_scatterdiv16sf;
39811 goto scatter_gen;
39812 case IX86_BUILTIN_SCATTERDIV8DF:
39813 icode = CODE_FOR_avx512f_scatterdiv8df;
39814 goto scatter_gen;
39815 case IX86_BUILTIN_SCATTERSIV16SI:
39816 icode = CODE_FOR_avx512f_scattersiv16si;
39817 goto scatter_gen;
39818 case IX86_BUILTIN_SCATTERSIV8DI:
39819 icode = CODE_FOR_avx512f_scattersiv8di;
39820 goto scatter_gen;
39821 case IX86_BUILTIN_SCATTERDIV16SI:
39822 icode = CODE_FOR_avx512f_scatterdiv16si;
39823 goto scatter_gen;
39824 case IX86_BUILTIN_SCATTERDIV8DI:
39825 icode = CODE_FOR_avx512f_scatterdiv8di;
39826 goto scatter_gen;
39827 case IX86_BUILTIN_SCATTERSIV8SF:
39828 icode = CODE_FOR_avx512vl_scattersiv8sf;
39829 goto scatter_gen;
39830 case IX86_BUILTIN_SCATTERSIV4SF:
39831 icode = CODE_FOR_avx512vl_scattersiv4sf;
39832 goto scatter_gen;
39833 case IX86_BUILTIN_SCATTERSIV4DF:
39834 icode = CODE_FOR_avx512vl_scattersiv4df;
39835 goto scatter_gen;
39836 case IX86_BUILTIN_SCATTERSIV2DF:
39837 icode = CODE_FOR_avx512vl_scattersiv2df;
39838 goto scatter_gen;
39839 case IX86_BUILTIN_SCATTERDIV8SF:
39840 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39841 goto scatter_gen;
39842 case IX86_BUILTIN_SCATTERDIV4SF:
39843 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39844 goto scatter_gen;
39845 case IX86_BUILTIN_SCATTERDIV4DF:
39846 icode = CODE_FOR_avx512vl_scatterdiv4df;
39847 goto scatter_gen;
39848 case IX86_BUILTIN_SCATTERDIV2DF:
39849 icode = CODE_FOR_avx512vl_scatterdiv2df;
39850 goto scatter_gen;
39851 case IX86_BUILTIN_SCATTERSIV8SI:
39852 icode = CODE_FOR_avx512vl_scattersiv8si;
39853 goto scatter_gen;
39854 case IX86_BUILTIN_SCATTERSIV4SI:
39855 icode = CODE_FOR_avx512vl_scattersiv4si;
39856 goto scatter_gen;
39857 case IX86_BUILTIN_SCATTERSIV4DI:
39858 icode = CODE_FOR_avx512vl_scattersiv4di;
39859 goto scatter_gen;
39860 case IX86_BUILTIN_SCATTERSIV2DI:
39861 icode = CODE_FOR_avx512vl_scattersiv2di;
39862 goto scatter_gen;
39863 case IX86_BUILTIN_SCATTERDIV8SI:
39864 icode = CODE_FOR_avx512vl_scatterdiv8si;
39865 goto scatter_gen;
39866 case IX86_BUILTIN_SCATTERDIV4SI:
39867 icode = CODE_FOR_avx512vl_scatterdiv4si;
39868 goto scatter_gen;
39869 case IX86_BUILTIN_SCATTERDIV4DI:
39870 icode = CODE_FOR_avx512vl_scatterdiv4di;
39871 goto scatter_gen;
39872 case IX86_BUILTIN_SCATTERDIV2DI:
39873 icode = CODE_FOR_avx512vl_scatterdiv2di;
39874 goto scatter_gen;
39875 case IX86_BUILTIN_GATHERPFDPD:
39876 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39877 goto vec_prefetch_gen;
39878 case IX86_BUILTIN_GATHERPFDPS:
39879 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39880 goto vec_prefetch_gen;
39881 case IX86_BUILTIN_GATHERPFQPD:
39882 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39883 goto vec_prefetch_gen;
39884 case IX86_BUILTIN_GATHERPFQPS:
39885 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39886 goto vec_prefetch_gen;
39887 case IX86_BUILTIN_SCATTERPFDPD:
39888 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39889 goto vec_prefetch_gen;
39890 case IX86_BUILTIN_SCATTERPFDPS:
39891 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39892 goto vec_prefetch_gen;
39893 case IX86_BUILTIN_SCATTERPFQPD:
39894 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39895 goto vec_prefetch_gen;
39896 case IX86_BUILTIN_SCATTERPFQPS:
39897 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39898 goto vec_prefetch_gen;
39900 gather_gen:
39901 rtx half;
39902 rtx (*gen) (rtx, rtx);
39904 arg0 = CALL_EXPR_ARG (exp, 0);
39905 arg1 = CALL_EXPR_ARG (exp, 1);
39906 arg2 = CALL_EXPR_ARG (exp, 2);
39907 arg3 = CALL_EXPR_ARG (exp, 3);
39908 arg4 = CALL_EXPR_ARG (exp, 4);
39909 op0 = expand_normal (arg0);
39910 op1 = expand_normal (arg1);
39911 op2 = expand_normal (arg2);
39912 op3 = expand_normal (arg3);
39913 op4 = expand_normal (arg4);
39914 /* Note the arg order is different from the operand order. */
39915 mode0 = insn_data[icode].operand[1].mode;
39916 mode2 = insn_data[icode].operand[3].mode;
39917 mode3 = insn_data[icode].operand[4].mode;
39918 mode4 = insn_data[icode].operand[5].mode;
39920 if (target == NULL_RTX
39921 || GET_MODE (target) != insn_data[icode].operand[0].mode
39922 || !insn_data[icode].operand[0].predicate (target,
39923 GET_MODE (target)))
39924 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39925 else
39926 subtarget = target;
39928 switch (fcode)
39930 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39931 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39932 half = gen_reg_rtx (V8SImode);
39933 if (!nonimmediate_operand (op2, V16SImode))
39934 op2 = copy_to_mode_reg (V16SImode, op2);
39935 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39936 op2 = half;
39937 break;
39938 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39939 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39940 case IX86_BUILTIN_GATHERALTSIV4DF:
39941 case IX86_BUILTIN_GATHERALTSIV4DI:
39942 half = gen_reg_rtx (V4SImode);
39943 if (!nonimmediate_operand (op2, V8SImode))
39944 op2 = copy_to_mode_reg (V8SImode, op2);
39945 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39946 op2 = half;
39947 break;
39948 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39949 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39950 half = gen_reg_rtx (mode0);
39951 if (mode0 == V8SFmode)
39952 gen = gen_vec_extract_lo_v16sf;
39953 else
39954 gen = gen_vec_extract_lo_v16si;
39955 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39956 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39957 emit_insn (gen (half, op0));
39958 op0 = half;
39959 if (GET_MODE (op3) != VOIDmode)
39961 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39962 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39963 emit_insn (gen (half, op3));
39964 op3 = half;
39966 break;
39967 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39968 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39969 case IX86_BUILTIN_GATHERALTDIV8SF:
39970 case IX86_BUILTIN_GATHERALTDIV8SI:
39971 half = gen_reg_rtx (mode0);
39972 if (mode0 == V4SFmode)
39973 gen = gen_vec_extract_lo_v8sf;
39974 else
39975 gen = gen_vec_extract_lo_v8si;
39976 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39977 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39978 emit_insn (gen (half, op0));
39979 op0 = half;
39980 if (GET_MODE (op3) != VOIDmode)
39982 if (!nonimmediate_operand (op3, GET_MODE (op3)))
39983 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
39984 emit_insn (gen (half, op3));
39985 op3 = half;
39987 break;
39988 default:
39989 break;
39992 /* Force memory operand only with base register here. But we
39993 don't want to do it on memory operand for other builtin
39994 functions. */
39995 op1 = ix86_zero_extend_to_Pmode (op1);
39997 if (!insn_data[icode].operand[1].predicate (op0, mode0))
39998 op0 = copy_to_mode_reg (mode0, op0);
39999 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
40000 op1 = copy_to_mode_reg (Pmode, op1);
40001 if (!insn_data[icode].operand[3].predicate (op2, mode2))
40002 op2 = copy_to_mode_reg (mode2, op2);
40004 op3 = fixup_modeless_constant (op3, mode3);
40006 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
40008 if (!insn_data[icode].operand[4].predicate (op3, mode3))
40009 op3 = copy_to_mode_reg (mode3, op3);
40011 else
40013 op3 = copy_to_reg (op3);
40014 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
40016 if (!insn_data[icode].operand[5].predicate (op4, mode4))
40018 error ("the last argument must be scale 1, 2, 4, 8");
40019 return const0_rtx;
40022 /* Optimize. If mask is known to have all high bits set,
40023 replace op0 with pc_rtx to signal that the instruction
40024 overwrites the whole destination and doesn't use its
40025 previous contents. */
40026 if (optimize)
40028 if (TREE_CODE (arg3) == INTEGER_CST)
40030 if (integer_all_onesp (arg3))
40031 op0 = pc_rtx;
40033 else if (TREE_CODE (arg3) == VECTOR_CST)
40035 unsigned int negative = 0;
40036 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40038 tree cst = VECTOR_CST_ELT (arg3, i);
40039 if (TREE_CODE (cst) == INTEGER_CST
40040 && tree_int_cst_sign_bit (cst))
40041 negative++;
40042 else if (TREE_CODE (cst) == REAL_CST
40043 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40044 negative++;
40046 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40047 op0 = pc_rtx;
40049 else if (TREE_CODE (arg3) == SSA_NAME
40050 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40052 /* Recognize also when mask is like:
40053 __v2df src = _mm_setzero_pd ();
40054 __v2df mask = _mm_cmpeq_pd (src, src);
40056 __v8sf src = _mm256_setzero_ps ();
40057 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40058 as that is a cheaper way to load all ones into
40059 a register than having to load a constant from
40060 memory. */
40061 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40062 if (is_gimple_call (def_stmt))
40064 tree fndecl = gimple_call_fndecl (def_stmt);
40065 if (fndecl
40066 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40067 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40069 case IX86_BUILTIN_CMPPD:
40070 case IX86_BUILTIN_CMPPS:
40071 case IX86_BUILTIN_CMPPD256:
40072 case IX86_BUILTIN_CMPPS256:
40073 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40074 break;
40075 /* FALLTHRU */
40076 case IX86_BUILTIN_CMPEQPD:
40077 case IX86_BUILTIN_CMPEQPS:
40078 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40079 && initializer_zerop (gimple_call_arg (def_stmt,
40080 1)))
40081 op0 = pc_rtx;
40082 break;
40083 default:
40084 break;
40090 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40091 if (! pat)
40092 return const0_rtx;
40093 emit_insn (pat);
40095 switch (fcode)
40097 case IX86_BUILTIN_GATHER3DIV16SF:
40098 if (target == NULL_RTX)
40099 target = gen_reg_rtx (V8SFmode);
40100 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40101 break;
40102 case IX86_BUILTIN_GATHER3DIV16SI:
40103 if (target == NULL_RTX)
40104 target = gen_reg_rtx (V8SImode);
40105 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40106 break;
40107 case IX86_BUILTIN_GATHER3DIV8SF:
40108 case IX86_BUILTIN_GATHERDIV8SF:
40109 if (target == NULL_RTX)
40110 target = gen_reg_rtx (V4SFmode);
40111 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40112 break;
40113 case IX86_BUILTIN_GATHER3DIV8SI:
40114 case IX86_BUILTIN_GATHERDIV8SI:
40115 if (target == NULL_RTX)
40116 target = gen_reg_rtx (V4SImode);
40117 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40118 break;
40119 default:
40120 target = subtarget;
40121 break;
40123 return target;
40125 scatter_gen:
40126 arg0 = CALL_EXPR_ARG (exp, 0);
40127 arg1 = CALL_EXPR_ARG (exp, 1);
40128 arg2 = CALL_EXPR_ARG (exp, 2);
40129 arg3 = CALL_EXPR_ARG (exp, 3);
40130 arg4 = CALL_EXPR_ARG (exp, 4);
40131 op0 = expand_normal (arg0);
40132 op1 = expand_normal (arg1);
40133 op2 = expand_normal (arg2);
40134 op3 = expand_normal (arg3);
40135 op4 = expand_normal (arg4);
40136 mode1 = insn_data[icode].operand[1].mode;
40137 mode2 = insn_data[icode].operand[2].mode;
40138 mode3 = insn_data[icode].operand[3].mode;
40139 mode4 = insn_data[icode].operand[4].mode;
40141 /* Force memory operand only with base register here. But we
40142 don't want to do it on memory operand for other builtin
40143 functions. */
40144 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40146 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40147 op0 = copy_to_mode_reg (Pmode, op0);
40149 op1 = fixup_modeless_constant (op1, mode1);
40151 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40153 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40154 op1 = copy_to_mode_reg (mode1, op1);
40156 else
40158 op1 = copy_to_reg (op1);
40159 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40162 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40163 op2 = copy_to_mode_reg (mode2, op2);
40165 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40166 op3 = copy_to_mode_reg (mode3, op3);
40168 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40170 error ("the last argument must be scale 1, 2, 4, 8");
40171 return const0_rtx;
40174 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40175 if (! pat)
40176 return const0_rtx;
40178 emit_insn (pat);
40179 return 0;
40181 vec_prefetch_gen:
40182 arg0 = CALL_EXPR_ARG (exp, 0);
40183 arg1 = CALL_EXPR_ARG (exp, 1);
40184 arg2 = CALL_EXPR_ARG (exp, 2);
40185 arg3 = CALL_EXPR_ARG (exp, 3);
40186 arg4 = CALL_EXPR_ARG (exp, 4);
40187 op0 = expand_normal (arg0);
40188 op1 = expand_normal (arg1);
40189 op2 = expand_normal (arg2);
40190 op3 = expand_normal (arg3);
40191 op4 = expand_normal (arg4);
40192 mode0 = insn_data[icode].operand[0].mode;
40193 mode1 = insn_data[icode].operand[1].mode;
40194 mode3 = insn_data[icode].operand[3].mode;
40195 mode4 = insn_data[icode].operand[4].mode;
40197 op0 = fixup_modeless_constant (op0, mode0);
40199 if (GET_MODE (op0) == mode0
40200 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40202 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40203 op0 = copy_to_mode_reg (mode0, op0);
40205 else if (op0 != constm1_rtx)
40207 op0 = copy_to_reg (op0);
40208 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40211 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40212 op1 = copy_to_mode_reg (mode1, op1);
40214 /* Force memory operand only with base register here. But we
40215 don't want to do it on memory operand for other builtin
40216 functions. */
40217 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40219 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40220 op2 = copy_to_mode_reg (Pmode, op2);
40222 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40224 error ("the forth argument must be scale 1, 2, 4, 8");
40225 return const0_rtx;
40228 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40230 error ("incorrect hint operand");
40231 return const0_rtx;
40234 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40235 if (! pat)
40236 return const0_rtx;
40238 emit_insn (pat);
40240 return 0;
40242 case IX86_BUILTIN_XABORT:
40243 icode = CODE_FOR_xabort;
40244 arg0 = CALL_EXPR_ARG (exp, 0);
40245 op0 = expand_normal (arg0);
40246 mode0 = insn_data[icode].operand[0].mode;
40247 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40249 error ("the xabort's argument must be an 8-bit immediate");
40250 return const0_rtx;
40252 emit_insn (gen_xabort (op0));
40253 return 0;
40255 default:
40256 break;
40259 for (i = 0, d = bdesc_special_args;
40260 i < ARRAY_SIZE (bdesc_special_args);
40261 i++, d++)
40262 if (d->code == fcode)
40263 return ix86_expand_special_args_builtin (d, exp, target);
40265 for (i = 0, d = bdesc_args;
40266 i < ARRAY_SIZE (bdesc_args);
40267 i++, d++)
40268 if (d->code == fcode)
40269 switch (fcode)
40271 case IX86_BUILTIN_FABSQ:
40272 case IX86_BUILTIN_COPYSIGNQ:
40273 if (!TARGET_SSE)
40274 /* Emit a normal call if SSE isn't available. */
40275 return expand_call (exp, target, ignore);
40276 default:
40277 return ix86_expand_args_builtin (d, exp, target);
40280 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40281 if (d->code == fcode)
40282 return ix86_expand_sse_comi (d, exp, target);
40284 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40285 if (d->code == fcode)
40286 return ix86_expand_round_builtin (d, exp, target);
40288 for (i = 0, d = bdesc_pcmpestr;
40289 i < ARRAY_SIZE (bdesc_pcmpestr);
40290 i++, d++)
40291 if (d->code == fcode)
40292 return ix86_expand_sse_pcmpestr (d, exp, target);
40294 for (i = 0, d = bdesc_pcmpistr;
40295 i < ARRAY_SIZE (bdesc_pcmpistr);
40296 i++, d++)
40297 if (d->code == fcode)
40298 return ix86_expand_sse_pcmpistr (d, exp, target);
40300 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40301 if (d->code == fcode)
40302 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40303 (enum ix86_builtin_func_type)
40304 d->flag, d->comparison);
40306 gcc_unreachable ();
40309 /* This returns the target-specific builtin with code CODE if
40310 current_function_decl has visibility on this builtin, which is checked
40311 using isa flags. Returns NULL_TREE otherwise. */
40313 static tree ix86_get_builtin (enum ix86_builtins code)
40315 struct cl_target_option *opts;
40316 tree target_tree = NULL_TREE;
40318 /* Determine the isa flags of current_function_decl. */
40320 if (current_function_decl)
40321 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40323 if (target_tree == NULL)
40324 target_tree = target_option_default_node;
40326 opts = TREE_TARGET_OPTION (target_tree);
40328 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40329 return ix86_builtin_decl (code, true);
40330 else
40331 return NULL_TREE;
40334 /* Return function decl for target specific builtin
40335 for given MPX builtin passed i FCODE. */
40336 static tree
40337 ix86_builtin_mpx_function (unsigned fcode)
40339 switch (fcode)
40341 case BUILT_IN_CHKP_BNDMK:
40342 return ix86_builtins[IX86_BUILTIN_BNDMK];
40344 case BUILT_IN_CHKP_BNDSTX:
40345 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40347 case BUILT_IN_CHKP_BNDLDX:
40348 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40350 case BUILT_IN_CHKP_BNDCL:
40351 return ix86_builtins[IX86_BUILTIN_BNDCL];
40353 case BUILT_IN_CHKP_BNDCU:
40354 return ix86_builtins[IX86_BUILTIN_BNDCU];
40356 case BUILT_IN_CHKP_BNDRET:
40357 return ix86_builtins[IX86_BUILTIN_BNDRET];
40359 case BUILT_IN_CHKP_INTERSECT:
40360 return ix86_builtins[IX86_BUILTIN_BNDINT];
40362 case BUILT_IN_CHKP_NARROW:
40363 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40365 case BUILT_IN_CHKP_SIZEOF:
40366 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40368 case BUILT_IN_CHKP_EXTRACT_LOWER:
40369 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40371 case BUILT_IN_CHKP_EXTRACT_UPPER:
40372 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40374 default:
40375 return NULL_TREE;
40378 gcc_unreachable ();
40381 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40383 Return an address to be used to load/store bounds for pointer
40384 passed in SLOT.
40386 SLOT_NO is an integer constant holding number of a target
40387 dependent special slot to be used in case SLOT is not a memory.
40389 SPECIAL_BASE is a pointer to be used as a base of fake address
40390 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40391 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40393 static rtx
40394 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40396 rtx addr = NULL;
40398 /* NULL slot means we pass bounds for pointer not passed to the
40399 function at all. Register slot means we pass pointer in a
40400 register. In both these cases bounds are passed via Bounds
40401 Table. Since we do not have actual pointer stored in memory,
40402 we have to use fake addresses to access Bounds Table. We
40403 start with (special_base - sizeof (void*)) and decrease this
40404 address by pointer size to get addresses for other slots. */
40405 if (!slot || REG_P (slot))
40407 gcc_assert (CONST_INT_P (slot_no));
40408 addr = plus_constant (Pmode, special_base,
40409 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40411 /* If pointer is passed in a memory then its address is used to
40412 access Bounds Table. */
40413 else if (MEM_P (slot))
40415 addr = XEXP (slot, 0);
40416 if (!register_operand (addr, Pmode))
40417 addr = copy_addr_to_reg (addr);
40419 else
40420 gcc_unreachable ();
40422 return addr;
40425 /* Expand pass uses this hook to load bounds for function parameter
40426 PTR passed in SLOT in case its bounds are not passed in a register.
40428 If SLOT is a memory, then bounds are loaded as for regular pointer
40429 loaded from memory. PTR may be NULL in case SLOT is a memory.
40430 In such case value of PTR (if required) may be loaded from SLOT.
40432 If SLOT is NULL or a register then SLOT_NO is an integer constant
40433 holding number of the target dependent special slot which should be
40434 used to obtain bounds.
40436 Return loaded bounds. */
40438 static rtx
40439 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40441 rtx reg = gen_reg_rtx (BNDmode);
40442 rtx addr;
40444 /* Get address to be used to access Bounds Table. Special slots start
40445 at the location of return address of the current function. */
40446 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40448 /* Load pointer value from a memory if we don't have it. */
40449 if (!ptr)
40451 gcc_assert (MEM_P (slot));
40452 ptr = copy_addr_to_reg (slot);
40455 if (!register_operand (ptr, Pmode))
40456 ptr = ix86_zero_extend_to_Pmode (ptr);
40458 emit_insn (BNDmode == BND64mode
40459 ? gen_bnd64_ldx (reg, addr, ptr)
40460 : gen_bnd32_ldx (reg, addr, ptr));
40462 return reg;
40465 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40466 passed in SLOT in case BOUNDS are not passed in a register.
40468 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40469 stored in memory. PTR may be NULL in case SLOT is a memory.
40470 In such case value of PTR (if required) may be loaded from SLOT.
40472 If SLOT is NULL or a register then SLOT_NO is an integer constant
40473 holding number of the target dependent special slot which should be
40474 used to store BOUNDS. */
40476 static void
40477 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40479 rtx addr;
40481 /* Get address to be used to access Bounds Table. Special slots start
40482 at the location of return address of a called function. */
40483 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40485 /* Load pointer value from a memory if we don't have it. */
40486 if (!ptr)
40488 gcc_assert (MEM_P (slot));
40489 ptr = copy_addr_to_reg (slot);
40492 if (!register_operand (ptr, Pmode))
40493 ptr = ix86_zero_extend_to_Pmode (ptr);
40495 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40496 if (!register_operand (bounds, BNDmode))
40497 bounds = copy_to_mode_reg (BNDmode, bounds);
40499 emit_insn (BNDmode == BND64mode
40500 ? gen_bnd64_stx (addr, ptr, bounds)
40501 : gen_bnd32_stx (addr, ptr, bounds));
40504 /* Load and return bounds returned by function in SLOT. */
40506 static rtx
40507 ix86_load_returned_bounds (rtx slot)
40509 rtx res;
40511 gcc_assert (REG_P (slot));
40512 res = gen_reg_rtx (BNDmode);
40513 emit_move_insn (res, slot);
40515 return res;
40518 /* Store BOUNDS returned by function into SLOT. */
40520 static void
40521 ix86_store_returned_bounds (rtx slot, rtx bounds)
40523 gcc_assert (REG_P (slot));
40524 emit_move_insn (slot, bounds);
40527 /* Returns a function decl for a vectorized version of the builtin function
40528 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40529 if it is not available. */
40531 static tree
40532 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40533 tree type_in)
40535 machine_mode in_mode, out_mode;
40536 int in_n, out_n;
40537 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40539 if (TREE_CODE (type_out) != VECTOR_TYPE
40540 || TREE_CODE (type_in) != VECTOR_TYPE
40541 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40542 return NULL_TREE;
40544 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40545 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40546 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40547 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40549 switch (fn)
40551 case BUILT_IN_SQRT:
40552 if (out_mode == DFmode && in_mode == DFmode)
40554 if (out_n == 2 && in_n == 2)
40555 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40556 else if (out_n == 4 && in_n == 4)
40557 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40558 else if (out_n == 8 && in_n == 8)
40559 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40561 break;
40563 case BUILT_IN_EXP2F:
40564 if (out_mode == SFmode && in_mode == SFmode)
40566 if (out_n == 16 && in_n == 16)
40567 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40569 break;
40571 case BUILT_IN_SQRTF:
40572 if (out_mode == SFmode && in_mode == SFmode)
40574 if (out_n == 4 && in_n == 4)
40575 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40576 else if (out_n == 8 && in_n == 8)
40577 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40578 else if (out_n == 16 && in_n == 16)
40579 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40581 break;
40583 case BUILT_IN_IFLOOR:
40584 case BUILT_IN_LFLOOR:
40585 case BUILT_IN_LLFLOOR:
40586 /* The round insn does not trap on denormals. */
40587 if (flag_trapping_math || !TARGET_ROUND)
40588 break;
40590 if (out_mode == SImode && in_mode == DFmode)
40592 if (out_n == 4 && in_n == 2)
40593 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40594 else if (out_n == 8 && in_n == 4)
40595 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40596 else if (out_n == 16 && in_n == 8)
40597 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40599 break;
40601 case BUILT_IN_IFLOORF:
40602 case BUILT_IN_LFLOORF:
40603 case BUILT_IN_LLFLOORF:
40604 /* The round insn does not trap on denormals. */
40605 if (flag_trapping_math || !TARGET_ROUND)
40606 break;
40608 if (out_mode == SImode && in_mode == SFmode)
40610 if (out_n == 4 && in_n == 4)
40611 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40612 else if (out_n == 8 && in_n == 8)
40613 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40615 break;
40617 case BUILT_IN_ICEIL:
40618 case BUILT_IN_LCEIL:
40619 case BUILT_IN_LLCEIL:
40620 /* The round insn does not trap on denormals. */
40621 if (flag_trapping_math || !TARGET_ROUND)
40622 break;
40624 if (out_mode == SImode && in_mode == DFmode)
40626 if (out_n == 4 && in_n == 2)
40627 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40628 else if (out_n == 8 && in_n == 4)
40629 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40630 else if (out_n == 16 && in_n == 8)
40631 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40633 break;
40635 case BUILT_IN_ICEILF:
40636 case BUILT_IN_LCEILF:
40637 case BUILT_IN_LLCEILF:
40638 /* The round insn does not trap on denormals. */
40639 if (flag_trapping_math || !TARGET_ROUND)
40640 break;
40642 if (out_mode == SImode && in_mode == SFmode)
40644 if (out_n == 4 && in_n == 4)
40645 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40646 else if (out_n == 8 && in_n == 8)
40647 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40649 break;
40651 case BUILT_IN_IRINT:
40652 case BUILT_IN_LRINT:
40653 case BUILT_IN_LLRINT:
40654 if (out_mode == SImode && in_mode == DFmode)
40656 if (out_n == 4 && in_n == 2)
40657 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40658 else if (out_n == 8 && in_n == 4)
40659 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40661 break;
40663 case BUILT_IN_IRINTF:
40664 case BUILT_IN_LRINTF:
40665 case BUILT_IN_LLRINTF:
40666 if (out_mode == SImode && in_mode == SFmode)
40668 if (out_n == 4 && in_n == 4)
40669 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40670 else if (out_n == 8 && in_n == 8)
40671 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40673 break;
40675 case BUILT_IN_IROUND:
40676 case BUILT_IN_LROUND:
40677 case BUILT_IN_LLROUND:
40678 /* The round insn does not trap on denormals. */
40679 if (flag_trapping_math || !TARGET_ROUND)
40680 break;
40682 if (out_mode == SImode && in_mode == DFmode)
40684 if (out_n == 4 && in_n == 2)
40685 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40686 else if (out_n == 8 && in_n == 4)
40687 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40688 else if (out_n == 16 && in_n == 8)
40689 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40691 break;
40693 case BUILT_IN_IROUNDF:
40694 case BUILT_IN_LROUNDF:
40695 case BUILT_IN_LLROUNDF:
40696 /* The round insn does not trap on denormals. */
40697 if (flag_trapping_math || !TARGET_ROUND)
40698 break;
40700 if (out_mode == SImode && in_mode == SFmode)
40702 if (out_n == 4 && in_n == 4)
40703 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40704 else if (out_n == 8 && in_n == 8)
40705 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40707 break;
40709 case BUILT_IN_COPYSIGN:
40710 if (out_mode == DFmode && in_mode == DFmode)
40712 if (out_n == 2 && in_n == 2)
40713 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40714 else if (out_n == 4 && in_n == 4)
40715 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40716 else if (out_n == 8 && in_n == 8)
40717 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40719 break;
40721 case BUILT_IN_COPYSIGNF:
40722 if (out_mode == SFmode && in_mode == SFmode)
40724 if (out_n == 4 && in_n == 4)
40725 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40726 else if (out_n == 8 && in_n == 8)
40727 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40728 else if (out_n == 16 && in_n == 16)
40729 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40731 break;
40733 case BUILT_IN_FLOOR:
40734 /* The round insn does not trap on denormals. */
40735 if (flag_trapping_math || !TARGET_ROUND)
40736 break;
40738 if (out_mode == DFmode && in_mode == DFmode)
40740 if (out_n == 2 && in_n == 2)
40741 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40742 else if (out_n == 4 && in_n == 4)
40743 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40745 break;
40747 case BUILT_IN_FLOORF:
40748 /* The round insn does not trap on denormals. */
40749 if (flag_trapping_math || !TARGET_ROUND)
40750 break;
40752 if (out_mode == SFmode && in_mode == SFmode)
40754 if (out_n == 4 && in_n == 4)
40755 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40756 else if (out_n == 8 && in_n == 8)
40757 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40759 break;
40761 case BUILT_IN_CEIL:
40762 /* The round insn does not trap on denormals. */
40763 if (flag_trapping_math || !TARGET_ROUND)
40764 break;
40766 if (out_mode == DFmode && in_mode == DFmode)
40768 if (out_n == 2 && in_n == 2)
40769 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40770 else if (out_n == 4 && in_n == 4)
40771 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40773 break;
40775 case BUILT_IN_CEILF:
40776 /* The round insn does not trap on denormals. */
40777 if (flag_trapping_math || !TARGET_ROUND)
40778 break;
40780 if (out_mode == SFmode && in_mode == SFmode)
40782 if (out_n == 4 && in_n == 4)
40783 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40784 else if (out_n == 8 && in_n == 8)
40785 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40787 break;
40789 case BUILT_IN_TRUNC:
40790 /* The round insn does not trap on denormals. */
40791 if (flag_trapping_math || !TARGET_ROUND)
40792 break;
40794 if (out_mode == DFmode && in_mode == DFmode)
40796 if (out_n == 2 && in_n == 2)
40797 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40798 else if (out_n == 4 && in_n == 4)
40799 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40801 break;
40803 case BUILT_IN_TRUNCF:
40804 /* The round insn does not trap on denormals. */
40805 if (flag_trapping_math || !TARGET_ROUND)
40806 break;
40808 if (out_mode == SFmode && in_mode == SFmode)
40810 if (out_n == 4 && in_n == 4)
40811 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40812 else if (out_n == 8 && in_n == 8)
40813 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40815 break;
40817 case BUILT_IN_RINT:
40818 /* The round insn does not trap on denormals. */
40819 if (flag_trapping_math || !TARGET_ROUND)
40820 break;
40822 if (out_mode == DFmode && in_mode == DFmode)
40824 if (out_n == 2 && in_n == 2)
40825 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40826 else if (out_n == 4 && in_n == 4)
40827 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40829 break;
40831 case BUILT_IN_RINTF:
40832 /* The round insn does not trap on denormals. */
40833 if (flag_trapping_math || !TARGET_ROUND)
40834 break;
40836 if (out_mode == SFmode && in_mode == SFmode)
40838 if (out_n == 4 && in_n == 4)
40839 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40840 else if (out_n == 8 && in_n == 8)
40841 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40843 break;
40845 case BUILT_IN_ROUND:
40846 /* The round insn does not trap on denormals. */
40847 if (flag_trapping_math || !TARGET_ROUND)
40848 break;
40850 if (out_mode == DFmode && in_mode == DFmode)
40852 if (out_n == 2 && in_n == 2)
40853 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40854 else if (out_n == 4 && in_n == 4)
40855 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40857 break;
40859 case BUILT_IN_ROUNDF:
40860 /* The round insn does not trap on denormals. */
40861 if (flag_trapping_math || !TARGET_ROUND)
40862 break;
40864 if (out_mode == SFmode && in_mode == SFmode)
40866 if (out_n == 4 && in_n == 4)
40867 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40868 else if (out_n == 8 && in_n == 8)
40869 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40871 break;
40873 case BUILT_IN_FMA:
40874 if (out_mode == DFmode && in_mode == DFmode)
40876 if (out_n == 2 && in_n == 2)
40877 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40878 if (out_n == 4 && in_n == 4)
40879 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40881 break;
40883 case BUILT_IN_FMAF:
40884 if (out_mode == SFmode && in_mode == SFmode)
40886 if (out_n == 4 && in_n == 4)
40887 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40888 if (out_n == 8 && in_n == 8)
40889 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40891 break;
40893 default:
40894 break;
40897 /* Dispatch to a handler for a vectorization library. */
40898 if (ix86_veclib_handler)
40899 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40900 type_in);
40902 return NULL_TREE;
40905 /* Handler for an SVML-style interface to
40906 a library with vectorized intrinsics. */
40908 static tree
40909 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40911 char name[20];
40912 tree fntype, new_fndecl, args;
40913 unsigned arity;
40914 const char *bname;
40915 machine_mode el_mode, in_mode;
40916 int n, in_n;
40918 /* The SVML is suitable for unsafe math only. */
40919 if (!flag_unsafe_math_optimizations)
40920 return NULL_TREE;
40922 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40923 n = TYPE_VECTOR_SUBPARTS (type_out);
40924 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40925 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40926 if (el_mode != in_mode
40927 || n != in_n)
40928 return NULL_TREE;
40930 switch (fn)
40932 case BUILT_IN_EXP:
40933 case BUILT_IN_LOG:
40934 case BUILT_IN_LOG10:
40935 case BUILT_IN_POW:
40936 case BUILT_IN_TANH:
40937 case BUILT_IN_TAN:
40938 case BUILT_IN_ATAN:
40939 case BUILT_IN_ATAN2:
40940 case BUILT_IN_ATANH:
40941 case BUILT_IN_CBRT:
40942 case BUILT_IN_SINH:
40943 case BUILT_IN_SIN:
40944 case BUILT_IN_ASINH:
40945 case BUILT_IN_ASIN:
40946 case BUILT_IN_COSH:
40947 case BUILT_IN_COS:
40948 case BUILT_IN_ACOSH:
40949 case BUILT_IN_ACOS:
40950 if (el_mode != DFmode || n != 2)
40951 return NULL_TREE;
40952 break;
40954 case BUILT_IN_EXPF:
40955 case BUILT_IN_LOGF:
40956 case BUILT_IN_LOG10F:
40957 case BUILT_IN_POWF:
40958 case BUILT_IN_TANHF:
40959 case BUILT_IN_TANF:
40960 case BUILT_IN_ATANF:
40961 case BUILT_IN_ATAN2F:
40962 case BUILT_IN_ATANHF:
40963 case BUILT_IN_CBRTF:
40964 case BUILT_IN_SINHF:
40965 case BUILT_IN_SINF:
40966 case BUILT_IN_ASINHF:
40967 case BUILT_IN_ASINF:
40968 case BUILT_IN_COSHF:
40969 case BUILT_IN_COSF:
40970 case BUILT_IN_ACOSHF:
40971 case BUILT_IN_ACOSF:
40972 if (el_mode != SFmode || n != 4)
40973 return NULL_TREE;
40974 break;
40976 default:
40977 return NULL_TREE;
40980 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
40982 if (fn == BUILT_IN_LOGF)
40983 strcpy (name, "vmlsLn4");
40984 else if (fn == BUILT_IN_LOG)
40985 strcpy (name, "vmldLn2");
40986 else if (n == 4)
40988 sprintf (name, "vmls%s", bname+10);
40989 name[strlen (name)-1] = '4';
40991 else
40992 sprintf (name, "vmld%s2", bname+10);
40994 /* Convert to uppercase. */
40995 name[4] &= ~0x20;
40997 arity = 0;
40998 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
40999 args;
41000 args = TREE_CHAIN (args))
41001 arity++;
41003 if (arity == 1)
41004 fntype = build_function_type_list (type_out, type_in, NULL);
41005 else
41006 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41008 /* Build a function declaration for the vectorized function. */
41009 new_fndecl = build_decl (BUILTINS_LOCATION,
41010 FUNCTION_DECL, get_identifier (name), fntype);
41011 TREE_PUBLIC (new_fndecl) = 1;
41012 DECL_EXTERNAL (new_fndecl) = 1;
41013 DECL_IS_NOVOPS (new_fndecl) = 1;
41014 TREE_READONLY (new_fndecl) = 1;
41016 return new_fndecl;
41019 /* Handler for an ACML-style interface to
41020 a library with vectorized intrinsics. */
41022 static tree
41023 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41025 char name[20] = "__vr.._";
41026 tree fntype, new_fndecl, args;
41027 unsigned arity;
41028 const char *bname;
41029 machine_mode el_mode, in_mode;
41030 int n, in_n;
41032 /* The ACML is 64bits only and suitable for unsafe math only as
41033 it does not correctly support parts of IEEE with the required
41034 precision such as denormals. */
41035 if (!TARGET_64BIT
41036 || !flag_unsafe_math_optimizations)
41037 return NULL_TREE;
41039 el_mode = TYPE_MODE (TREE_TYPE (type_out));
41040 n = TYPE_VECTOR_SUBPARTS (type_out);
41041 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41042 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41043 if (el_mode != in_mode
41044 || n != in_n)
41045 return NULL_TREE;
41047 switch (fn)
41049 case BUILT_IN_SIN:
41050 case BUILT_IN_COS:
41051 case BUILT_IN_EXP:
41052 case BUILT_IN_LOG:
41053 case BUILT_IN_LOG2:
41054 case BUILT_IN_LOG10:
41055 name[4] = 'd';
41056 name[5] = '2';
41057 if (el_mode != DFmode
41058 || n != 2)
41059 return NULL_TREE;
41060 break;
41062 case BUILT_IN_SINF:
41063 case BUILT_IN_COSF:
41064 case BUILT_IN_EXPF:
41065 case BUILT_IN_POWF:
41066 case BUILT_IN_LOGF:
41067 case BUILT_IN_LOG2F:
41068 case BUILT_IN_LOG10F:
41069 name[4] = 's';
41070 name[5] = '4';
41071 if (el_mode != SFmode
41072 || n != 4)
41073 return NULL_TREE;
41074 break;
41076 default:
41077 return NULL_TREE;
41080 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41081 sprintf (name + 7, "%s", bname+10);
41083 arity = 0;
41084 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41085 args;
41086 args = TREE_CHAIN (args))
41087 arity++;
41089 if (arity == 1)
41090 fntype = build_function_type_list (type_out, type_in, NULL);
41091 else
41092 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41094 /* Build a function declaration for the vectorized function. */
41095 new_fndecl = build_decl (BUILTINS_LOCATION,
41096 FUNCTION_DECL, get_identifier (name), fntype);
41097 TREE_PUBLIC (new_fndecl) = 1;
41098 DECL_EXTERNAL (new_fndecl) = 1;
41099 DECL_IS_NOVOPS (new_fndecl) = 1;
41100 TREE_READONLY (new_fndecl) = 1;
41102 return new_fndecl;
41105 /* Returns a decl of a function that implements gather load with
41106 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41107 Return NULL_TREE if it is not available. */
41109 static tree
41110 ix86_vectorize_builtin_gather (const_tree mem_vectype,
41111 const_tree index_type, int scale)
41113 bool si;
41114 enum ix86_builtins code;
41116 if (! TARGET_AVX2)
41117 return NULL_TREE;
41119 if ((TREE_CODE (index_type) != INTEGER_TYPE
41120 && !POINTER_TYPE_P (index_type))
41121 || (TYPE_MODE (index_type) != SImode
41122 && TYPE_MODE (index_type) != DImode))
41123 return NULL_TREE;
41125 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41126 return NULL_TREE;
41128 /* v*gather* insn sign extends index to pointer mode. */
41129 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41130 && TYPE_UNSIGNED (index_type))
41131 return NULL_TREE;
41133 if (scale <= 0
41134 || scale > 8
41135 || (scale & (scale - 1)) != 0)
41136 return NULL_TREE;
41138 si = TYPE_MODE (index_type) == SImode;
41139 switch (TYPE_MODE (mem_vectype))
41141 case V2DFmode:
41142 if (TARGET_AVX512VL)
41143 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41144 else
41145 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41146 break;
41147 case V4DFmode:
41148 if (TARGET_AVX512VL)
41149 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41150 else
41151 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41152 break;
41153 case V2DImode:
41154 if (TARGET_AVX512VL)
41155 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41156 else
41157 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41158 break;
41159 case V4DImode:
41160 if (TARGET_AVX512VL)
41161 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41162 else
41163 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41164 break;
41165 case V4SFmode:
41166 if (TARGET_AVX512VL)
41167 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41168 else
41169 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41170 break;
41171 case V8SFmode:
41172 if (TARGET_AVX512VL)
41173 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41174 else
41175 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41176 break;
41177 case V4SImode:
41178 if (TARGET_AVX512VL)
41179 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41180 else
41181 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41182 break;
41183 case V8SImode:
41184 if (TARGET_AVX512VL)
41185 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41186 else
41187 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41188 break;
41189 case V8DFmode:
41190 if (TARGET_AVX512F)
41191 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41192 else
41193 return NULL_TREE;
41194 break;
41195 case V8DImode:
41196 if (TARGET_AVX512F)
41197 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41198 else
41199 return NULL_TREE;
41200 break;
41201 case V16SFmode:
41202 if (TARGET_AVX512F)
41203 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41204 else
41205 return NULL_TREE;
41206 break;
41207 case V16SImode:
41208 if (TARGET_AVX512F)
41209 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41210 else
41211 return NULL_TREE;
41212 break;
41213 default:
41214 return NULL_TREE;
41217 return ix86_get_builtin (code);
41220 /* Returns a code for a target-specific builtin that implements
41221 reciprocal of the function, or NULL_TREE if not available. */
41223 static tree
41224 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41226 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41227 && flag_finite_math_only && !flag_trapping_math
41228 && flag_unsafe_math_optimizations))
41229 return NULL_TREE;
41231 if (md_fn)
41232 /* Machine dependent builtins. */
41233 switch (fn)
41235 /* Vectorized version of sqrt to rsqrt conversion. */
41236 case IX86_BUILTIN_SQRTPS_NR:
41237 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41239 case IX86_BUILTIN_SQRTPS_NR256:
41240 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41242 default:
41243 return NULL_TREE;
41245 else
41246 /* Normal builtins. */
41247 switch (fn)
41249 /* Sqrt to rsqrt conversion. */
41250 case BUILT_IN_SQRTF:
41251 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41253 default:
41254 return NULL_TREE;
41258 /* Helper for avx_vpermilps256_operand et al. This is also used by
41259 the expansion functions to turn the parallel back into a mask.
41260 The return value is 0 for no match and the imm8+1 for a match. */
41263 avx_vpermilp_parallel (rtx par, machine_mode mode)
41265 unsigned i, nelt = GET_MODE_NUNITS (mode);
41266 unsigned mask = 0;
41267 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41269 if (XVECLEN (par, 0) != (int) nelt)
41270 return 0;
41272 /* Validate that all of the elements are constants, and not totally
41273 out of range. Copy the data into an integral array to make the
41274 subsequent checks easier. */
41275 for (i = 0; i < nelt; ++i)
41277 rtx er = XVECEXP (par, 0, i);
41278 unsigned HOST_WIDE_INT ei;
41280 if (!CONST_INT_P (er))
41281 return 0;
41282 ei = INTVAL (er);
41283 if (ei >= nelt)
41284 return 0;
41285 ipar[i] = ei;
41288 switch (mode)
41290 case V8DFmode:
41291 /* In the 512-bit DFmode case, we can only move elements within
41292 a 128-bit lane. First fill the second part of the mask,
41293 then fallthru. */
41294 for (i = 4; i < 6; ++i)
41296 if (ipar[i] < 4 || ipar[i] >= 6)
41297 return 0;
41298 mask |= (ipar[i] - 4) << i;
41300 for (i = 6; i < 8; ++i)
41302 if (ipar[i] < 6)
41303 return 0;
41304 mask |= (ipar[i] - 6) << i;
41306 /* FALLTHRU */
41308 case V4DFmode:
41309 /* In the 256-bit DFmode case, we can only move elements within
41310 a 128-bit lane. */
41311 for (i = 0; i < 2; ++i)
41313 if (ipar[i] >= 2)
41314 return 0;
41315 mask |= ipar[i] << i;
41317 for (i = 2; i < 4; ++i)
41319 if (ipar[i] < 2)
41320 return 0;
41321 mask |= (ipar[i] - 2) << i;
41323 break;
41325 case V16SFmode:
41326 /* In 512 bit SFmode case, permutation in the upper 256 bits
41327 must mirror the permutation in the lower 256-bits. */
41328 for (i = 0; i < 8; ++i)
41329 if (ipar[i] + 8 != ipar[i + 8])
41330 return 0;
41331 /* FALLTHRU */
41333 case V8SFmode:
41334 /* In 256 bit SFmode case, we have full freedom of
41335 movement within the low 128-bit lane, but the high 128-bit
41336 lane must mirror the exact same pattern. */
41337 for (i = 0; i < 4; ++i)
41338 if (ipar[i] + 4 != ipar[i + 4])
41339 return 0;
41340 nelt = 4;
41341 /* FALLTHRU */
41343 case V2DFmode:
41344 case V4SFmode:
41345 /* In the 128-bit case, we've full freedom in the placement of
41346 the elements from the source operand. */
41347 for (i = 0; i < nelt; ++i)
41348 mask |= ipar[i] << (i * (nelt / 2));
41349 break;
41351 default:
41352 gcc_unreachable ();
41355 /* Make sure success has a non-zero value by adding one. */
41356 return mask + 1;
41359 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41360 the expansion functions to turn the parallel back into a mask.
41361 The return value is 0 for no match and the imm8+1 for a match. */
41364 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41366 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41367 unsigned mask = 0;
41368 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41370 if (XVECLEN (par, 0) != (int) nelt)
41371 return 0;
41373 /* Validate that all of the elements are constants, and not totally
41374 out of range. Copy the data into an integral array to make the
41375 subsequent checks easier. */
41376 for (i = 0; i < nelt; ++i)
41378 rtx er = XVECEXP (par, 0, i);
41379 unsigned HOST_WIDE_INT ei;
41381 if (!CONST_INT_P (er))
41382 return 0;
41383 ei = INTVAL (er);
41384 if (ei >= 2 * nelt)
41385 return 0;
41386 ipar[i] = ei;
41389 /* Validate that the halves of the permute are halves. */
41390 for (i = 0; i < nelt2 - 1; ++i)
41391 if (ipar[i] + 1 != ipar[i + 1])
41392 return 0;
41393 for (i = nelt2; i < nelt - 1; ++i)
41394 if (ipar[i] + 1 != ipar[i + 1])
41395 return 0;
41397 /* Reconstruct the mask. */
41398 for (i = 0; i < 2; ++i)
41400 unsigned e = ipar[i * nelt2];
41401 if (e % nelt2)
41402 return 0;
41403 e /= nelt2;
41404 mask |= e << (i * 4);
41407 /* Make sure success has a non-zero value by adding one. */
41408 return mask + 1;
41411 /* Return a register priority for hard reg REGNO. */
41412 static int
41413 ix86_register_priority (int hard_regno)
41415 /* ebp and r13 as the base always wants a displacement, r12 as the
41416 base always wants an index. So discourage their usage in an
41417 address. */
41418 if (hard_regno == R12_REG || hard_regno == R13_REG)
41419 return 0;
41420 if (hard_regno == BP_REG)
41421 return 1;
41422 /* New x86-64 int registers result in bigger code size. Discourage
41423 them. */
41424 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41425 return 2;
41426 /* New x86-64 SSE registers result in bigger code size. Discourage
41427 them. */
41428 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41429 return 2;
41430 /* Usage of AX register results in smaller code. Prefer it. */
41431 if (hard_regno == AX_REG)
41432 return 4;
41433 return 3;
41436 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41438 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41439 QImode must go into class Q_REGS.
41440 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41441 movdf to do mem-to-mem moves through integer regs. */
41443 static reg_class_t
41444 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41446 machine_mode mode = GET_MODE (x);
41448 /* We're only allowed to return a subclass of CLASS. Many of the
41449 following checks fail for NO_REGS, so eliminate that early. */
41450 if (regclass == NO_REGS)
41451 return NO_REGS;
41453 /* All classes can load zeros. */
41454 if (x == CONST0_RTX (mode))
41455 return regclass;
41457 /* Force constants into memory if we are loading a (nonzero) constant into
41458 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41459 instructions to load from a constant. */
41460 if (CONSTANT_P (x)
41461 && (MAYBE_MMX_CLASS_P (regclass)
41462 || MAYBE_SSE_CLASS_P (regclass)
41463 || MAYBE_MASK_CLASS_P (regclass)))
41464 return NO_REGS;
41466 /* Prefer SSE regs only, if we can use them for math. */
41467 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41468 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41470 /* Floating-point constants need more complex checks. */
41471 if (CONST_DOUBLE_P (x))
41473 /* General regs can load everything. */
41474 if (reg_class_subset_p (regclass, GENERAL_REGS))
41475 return regclass;
41477 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41478 zero above. We only want to wind up preferring 80387 registers if
41479 we plan on doing computation with them. */
41480 if (TARGET_80387
41481 && standard_80387_constant_p (x) > 0)
41483 /* Limit class to non-sse. */
41484 if (regclass == FLOAT_SSE_REGS)
41485 return FLOAT_REGS;
41486 if (regclass == FP_TOP_SSE_REGS)
41487 return FP_TOP_REG;
41488 if (regclass == FP_SECOND_SSE_REGS)
41489 return FP_SECOND_REG;
41490 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41491 return regclass;
41494 return NO_REGS;
41497 /* Generally when we see PLUS here, it's the function invariant
41498 (plus soft-fp const_int). Which can only be computed into general
41499 regs. */
41500 if (GET_CODE (x) == PLUS)
41501 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41503 /* QImode constants are easy to load, but non-constant QImode data
41504 must go into Q_REGS. */
41505 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41507 if (reg_class_subset_p (regclass, Q_REGS))
41508 return regclass;
41509 if (reg_class_subset_p (Q_REGS, regclass))
41510 return Q_REGS;
41511 return NO_REGS;
41514 return regclass;
41517 /* Discourage putting floating-point values in SSE registers unless
41518 SSE math is being used, and likewise for the 387 registers. */
41519 static reg_class_t
41520 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41522 machine_mode mode = GET_MODE (x);
41524 /* Restrict the output reload class to the register bank that we are doing
41525 math on. If we would like not to return a subset of CLASS, reject this
41526 alternative: if reload cannot do this, it will still use its choice. */
41527 mode = GET_MODE (x);
41528 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41529 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41531 if (X87_FLOAT_MODE_P (mode))
41533 if (regclass == FP_TOP_SSE_REGS)
41534 return FP_TOP_REG;
41535 else if (regclass == FP_SECOND_SSE_REGS)
41536 return FP_SECOND_REG;
41537 else
41538 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41541 return regclass;
41544 static reg_class_t
41545 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41546 machine_mode mode, secondary_reload_info *sri)
41548 /* Double-word spills from general registers to non-offsettable memory
41549 references (zero-extended addresses) require special handling. */
41550 if (TARGET_64BIT
41551 && MEM_P (x)
41552 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41553 && INTEGER_CLASS_P (rclass)
41554 && !offsettable_memref_p (x))
41556 sri->icode = (in_p
41557 ? CODE_FOR_reload_noff_load
41558 : CODE_FOR_reload_noff_store);
41559 /* Add the cost of moving address to a temporary. */
41560 sri->extra_cost = 1;
41562 return NO_REGS;
41565 /* QImode spills from non-QI registers require
41566 intermediate register on 32bit targets. */
41567 if (mode == QImode
41568 && (MAYBE_MASK_CLASS_P (rclass)
41569 || (!TARGET_64BIT && !in_p
41570 && INTEGER_CLASS_P (rclass)
41571 && MAYBE_NON_Q_CLASS_P (rclass))))
41573 int regno;
41575 if (REG_P (x))
41576 regno = REGNO (x);
41577 else
41578 regno = -1;
41580 if (regno >= FIRST_PSEUDO_REGISTER || SUBREG_P (x))
41581 regno = true_regnum (x);
41583 /* Return Q_REGS if the operand is in memory. */
41584 if (regno == -1)
41585 return Q_REGS;
41588 /* This condition handles corner case where an expression involving
41589 pointers gets vectorized. We're trying to use the address of a
41590 stack slot as a vector initializer.
41592 (set (reg:V2DI 74 [ vect_cst_.2 ])
41593 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41595 Eventually frame gets turned into sp+offset like this:
41597 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41598 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41599 (const_int 392 [0x188]))))
41601 That later gets turned into:
41603 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41604 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41605 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41607 We'll have the following reload recorded:
41609 Reload 0: reload_in (DI) =
41610 (plus:DI (reg/f:DI 7 sp)
41611 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41612 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41613 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41614 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41615 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41616 reload_reg_rtx: (reg:V2DI 22 xmm1)
41618 Which isn't going to work since SSE instructions can't handle scalar
41619 additions. Returning GENERAL_REGS forces the addition into integer
41620 register and reload can handle subsequent reloads without problems. */
41622 if (in_p && GET_CODE (x) == PLUS
41623 && SSE_CLASS_P (rclass)
41624 && SCALAR_INT_MODE_P (mode))
41625 return GENERAL_REGS;
41627 return NO_REGS;
41630 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41632 static bool
41633 ix86_class_likely_spilled_p (reg_class_t rclass)
41635 switch (rclass)
41637 case AREG:
41638 case DREG:
41639 case CREG:
41640 case BREG:
41641 case AD_REGS:
41642 case SIREG:
41643 case DIREG:
41644 case SSE_FIRST_REG:
41645 case FP_TOP_REG:
41646 case FP_SECOND_REG:
41647 case BND_REGS:
41648 return true;
41650 default:
41651 break;
41654 return false;
41657 /* If we are copying between general and FP registers, we need a memory
41658 location. The same is true for SSE and MMX registers.
41660 To optimize register_move_cost performance, allow inline variant.
41662 The macro can't work reliably when one of the CLASSES is class containing
41663 registers from multiple units (SSE, MMX, integer). We avoid this by never
41664 combining those units in single alternative in the machine description.
41665 Ensure that this constraint holds to avoid unexpected surprises.
41667 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41668 enforce these sanity checks. */
41670 static inline bool
41671 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41672 machine_mode mode, int strict)
41674 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41675 return false;
41676 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41677 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41678 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41679 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41680 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41681 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41683 gcc_assert (!strict || lra_in_progress);
41684 return true;
41687 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41688 return true;
41690 /* Between mask and general, we have moves no larger than word size. */
41691 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41692 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41693 return true;
41695 /* ??? This is a lie. We do have moves between mmx/general, and for
41696 mmx/sse2. But by saying we need secondary memory we discourage the
41697 register allocator from using the mmx registers unless needed. */
41698 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41699 return true;
41701 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41703 /* SSE1 doesn't have any direct moves from other classes. */
41704 if (!TARGET_SSE2)
41705 return true;
41707 /* If the target says that inter-unit moves are more expensive
41708 than moving through memory, then don't generate them. */
41709 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41710 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41711 return true;
41713 /* Between SSE and general, we have moves no larger than word size. */
41714 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41715 return true;
41718 return false;
41721 bool
41722 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41723 machine_mode mode, int strict)
41725 return inline_secondary_memory_needed (class1, class2, mode, strict);
41728 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41730 On the 80386, this is the size of MODE in words,
41731 except in the FP regs, where a single reg is always enough. */
41733 static unsigned char
41734 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41736 if (MAYBE_INTEGER_CLASS_P (rclass))
41738 if (mode == XFmode)
41739 return (TARGET_64BIT ? 2 : 3);
41740 else if (mode == XCmode)
41741 return (TARGET_64BIT ? 4 : 6);
41742 else
41743 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41745 else
41747 if (COMPLEX_MODE_P (mode))
41748 return 2;
41749 else
41750 return 1;
41754 /* Return true if the registers in CLASS cannot represent the change from
41755 modes FROM to TO. */
41757 bool
41758 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41759 enum reg_class regclass)
41761 if (from == to)
41762 return false;
41764 /* x87 registers can't do subreg at all, as all values are reformatted
41765 to extended precision. */
41766 if (MAYBE_FLOAT_CLASS_P (regclass))
41767 return true;
41769 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41771 /* Vector registers do not support QI or HImode loads. If we don't
41772 disallow a change to these modes, reload will assume it's ok to
41773 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41774 the vec_dupv4hi pattern. */
41775 if (GET_MODE_SIZE (from) < 4)
41776 return true;
41779 return false;
41782 /* Return the cost of moving data of mode M between a
41783 register and memory. A value of 2 is the default; this cost is
41784 relative to those in `REGISTER_MOVE_COST'.
41786 This function is used extensively by register_move_cost that is used to
41787 build tables at startup. Make it inline in this case.
41788 When IN is 2, return maximum of in and out move cost.
41790 If moving between registers and memory is more expensive than
41791 between two registers, you should define this macro to express the
41792 relative cost.
41794 Model also increased moving costs of QImode registers in non
41795 Q_REGS classes.
41797 static inline int
41798 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41799 int in)
41801 int cost;
41802 if (FLOAT_CLASS_P (regclass))
41804 int index;
41805 switch (mode)
41807 case SFmode:
41808 index = 0;
41809 break;
41810 case DFmode:
41811 index = 1;
41812 break;
41813 case XFmode:
41814 index = 2;
41815 break;
41816 default:
41817 return 100;
41819 if (in == 2)
41820 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41821 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41823 if (SSE_CLASS_P (regclass))
41825 int index;
41826 switch (GET_MODE_SIZE (mode))
41828 case 4:
41829 index = 0;
41830 break;
41831 case 8:
41832 index = 1;
41833 break;
41834 case 16:
41835 index = 2;
41836 break;
41837 default:
41838 return 100;
41840 if (in == 2)
41841 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41842 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41844 if (MMX_CLASS_P (regclass))
41846 int index;
41847 switch (GET_MODE_SIZE (mode))
41849 case 4:
41850 index = 0;
41851 break;
41852 case 8:
41853 index = 1;
41854 break;
41855 default:
41856 return 100;
41858 if (in)
41859 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41860 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41862 switch (GET_MODE_SIZE (mode))
41864 case 1:
41865 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41867 if (!in)
41868 return ix86_cost->int_store[0];
41869 if (TARGET_PARTIAL_REG_DEPENDENCY
41870 && optimize_function_for_speed_p (cfun))
41871 cost = ix86_cost->movzbl_load;
41872 else
41873 cost = ix86_cost->int_load[0];
41874 if (in == 2)
41875 return MAX (cost, ix86_cost->int_store[0]);
41876 return cost;
41878 else
41880 if (in == 2)
41881 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41882 if (in)
41883 return ix86_cost->movzbl_load;
41884 else
41885 return ix86_cost->int_store[0] + 4;
41887 break;
41888 case 2:
41889 if (in == 2)
41890 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41891 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41892 default:
41893 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41894 if (mode == TFmode)
41895 mode = XFmode;
41896 if (in == 2)
41897 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41898 else if (in)
41899 cost = ix86_cost->int_load[2];
41900 else
41901 cost = ix86_cost->int_store[2];
41902 return (cost * (((int) GET_MODE_SIZE (mode)
41903 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41907 static int
41908 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41909 bool in)
41911 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41915 /* Return the cost of moving data from a register in class CLASS1 to
41916 one in class CLASS2.
41918 It is not required that the cost always equal 2 when FROM is the same as TO;
41919 on some machines it is expensive to move between registers if they are not
41920 general registers. */
41922 static int
41923 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41924 reg_class_t class2_i)
41926 enum reg_class class1 = (enum reg_class) class1_i;
41927 enum reg_class class2 = (enum reg_class) class2_i;
41929 /* In case we require secondary memory, compute cost of the store followed
41930 by load. In order to avoid bad register allocation choices, we need
41931 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41933 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41935 int cost = 1;
41937 cost += inline_memory_move_cost (mode, class1, 2);
41938 cost += inline_memory_move_cost (mode, class2, 2);
41940 /* In case of copying from general_purpose_register we may emit multiple
41941 stores followed by single load causing memory size mismatch stall.
41942 Count this as arbitrarily high cost of 20. */
41943 if (targetm.class_max_nregs (class1, mode)
41944 > targetm.class_max_nregs (class2, mode))
41945 cost += 20;
41947 /* In the case of FP/MMX moves, the registers actually overlap, and we
41948 have to switch modes in order to treat them differently. */
41949 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41950 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41951 cost += 20;
41953 return cost;
41956 /* Moves between SSE/MMX and integer unit are expensive. */
41957 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41958 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41960 /* ??? By keeping returned value relatively high, we limit the number
41961 of moves between integer and MMX/SSE registers for all targets.
41962 Additionally, high value prevents problem with x86_modes_tieable_p(),
41963 where integer modes in MMX/SSE registers are not tieable
41964 because of missing QImode and HImode moves to, from or between
41965 MMX/SSE registers. */
41966 return MAX (8, ix86_cost->mmxsse_to_integer);
41968 if (MAYBE_FLOAT_CLASS_P (class1))
41969 return ix86_cost->fp_move;
41970 if (MAYBE_SSE_CLASS_P (class1))
41971 return ix86_cost->sse_move;
41972 if (MAYBE_MMX_CLASS_P (class1))
41973 return ix86_cost->mmx_move;
41974 return 2;
41977 /* Return TRUE if hard register REGNO can hold a value of machine-mode
41978 MODE. */
41980 bool
41981 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
41983 /* Flags and only flags can only hold CCmode values. */
41984 if (CC_REGNO_P (regno))
41985 return GET_MODE_CLASS (mode) == MODE_CC;
41986 if (GET_MODE_CLASS (mode) == MODE_CC
41987 || GET_MODE_CLASS (mode) == MODE_RANDOM
41988 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
41989 return false;
41990 if (STACK_REGNO_P (regno))
41991 return VALID_FP_MODE_P (mode);
41992 if (MASK_REGNO_P (regno))
41993 return (VALID_MASK_REG_MODE (mode)
41994 || (TARGET_AVX512BW
41995 && VALID_MASK_AVX512BW_MODE (mode)));
41996 if (BND_REGNO_P (regno))
41997 return VALID_BND_REG_MODE (mode);
41998 if (SSE_REGNO_P (regno))
42000 /* We implement the move patterns for all vector modes into and
42001 out of SSE registers, even when no operation instructions
42002 are available. */
42004 /* For AVX-512 we allow, regardless of regno:
42005 - XI mode
42006 - any of 512-bit wide vector mode
42007 - any scalar mode. */
42008 if (TARGET_AVX512F
42009 && (mode == XImode
42010 || VALID_AVX512F_REG_MODE (mode)
42011 || VALID_AVX512F_SCALAR_MODE (mode)))
42012 return true;
42014 /* TODO check for QI/HI scalars. */
42015 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
42016 if (TARGET_AVX512VL
42017 && (mode == OImode
42018 || mode == TImode
42019 || VALID_AVX256_REG_MODE (mode)
42020 || VALID_AVX512VL_128_REG_MODE (mode)))
42021 return true;
42023 /* xmm16-xmm31 are only available for AVX-512. */
42024 if (EXT_REX_SSE_REGNO_P (regno))
42025 return false;
42027 /* OImode and AVX modes are available only when AVX is enabled. */
42028 return ((TARGET_AVX
42029 && VALID_AVX256_REG_OR_OI_MODE (mode))
42030 || VALID_SSE_REG_MODE (mode)
42031 || VALID_SSE2_REG_MODE (mode)
42032 || VALID_MMX_REG_MODE (mode)
42033 || VALID_MMX_REG_MODE_3DNOW (mode));
42035 if (MMX_REGNO_P (regno))
42037 /* We implement the move patterns for 3DNOW modes even in MMX mode,
42038 so if the register is available at all, then we can move data of
42039 the given mode into or out of it. */
42040 return (VALID_MMX_REG_MODE (mode)
42041 || VALID_MMX_REG_MODE_3DNOW (mode));
42044 if (mode == QImode)
42046 /* Take care for QImode values - they can be in non-QI regs,
42047 but then they do cause partial register stalls. */
42048 if (ANY_QI_REGNO_P (regno))
42049 return true;
42050 if (!TARGET_PARTIAL_REG_STALL)
42051 return true;
42052 /* LRA checks if the hard register is OK for the given mode.
42053 QImode values can live in non-QI regs, so we allow all
42054 registers here. */
42055 if (lra_in_progress)
42056 return true;
42057 return !can_create_pseudo_p ();
42059 /* We handle both integer and floats in the general purpose registers. */
42060 else if (VALID_INT_MODE_P (mode))
42061 return true;
42062 else if (VALID_FP_MODE_P (mode))
42063 return true;
42064 else if (VALID_DFP_MODE_P (mode))
42065 return true;
42066 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
42067 on to use that value in smaller contexts, this can easily force a
42068 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
42069 supporting DImode, allow it. */
42070 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42071 return true;
42073 return false;
42076 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
42077 tieable integer mode. */
42079 static bool
42080 ix86_tieable_integer_mode_p (machine_mode mode)
42082 switch (mode)
42084 case HImode:
42085 case SImode:
42086 return true;
42088 case QImode:
42089 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42091 case DImode:
42092 return TARGET_64BIT;
42094 default:
42095 return false;
42099 /* Return true if MODE1 is accessible in a register that can hold MODE2
42100 without copying. That is, all register classes that can hold MODE2
42101 can also hold MODE1. */
42103 bool
42104 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42106 if (mode1 == mode2)
42107 return true;
42109 if (ix86_tieable_integer_mode_p (mode1)
42110 && ix86_tieable_integer_mode_p (mode2))
42111 return true;
42113 /* MODE2 being XFmode implies fp stack or general regs, which means we
42114 can tie any smaller floating point modes to it. Note that we do not
42115 tie this with TFmode. */
42116 if (mode2 == XFmode)
42117 return mode1 == SFmode || mode1 == DFmode;
42119 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42120 that we can tie it with SFmode. */
42121 if (mode2 == DFmode)
42122 return mode1 == SFmode;
42124 /* If MODE2 is only appropriate for an SSE register, then tie with
42125 any other mode acceptable to SSE registers. */
42126 if (GET_MODE_SIZE (mode2) == 32
42127 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42128 return (GET_MODE_SIZE (mode1) == 32
42129 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42130 if (GET_MODE_SIZE (mode2) == 16
42131 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42132 return (GET_MODE_SIZE (mode1) == 16
42133 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42135 /* If MODE2 is appropriate for an MMX register, then tie
42136 with any other mode acceptable to MMX registers. */
42137 if (GET_MODE_SIZE (mode2) == 8
42138 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42139 return (GET_MODE_SIZE (mode1) == 8
42140 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42142 return false;
42145 /* Return the cost of moving between two registers of mode MODE. */
42147 static int
42148 ix86_set_reg_reg_cost (machine_mode mode)
42150 unsigned int units = UNITS_PER_WORD;
42152 switch (GET_MODE_CLASS (mode))
42154 default:
42155 break;
42157 case MODE_CC:
42158 units = GET_MODE_SIZE (CCmode);
42159 break;
42161 case MODE_FLOAT:
42162 if ((TARGET_SSE && mode == TFmode)
42163 || (TARGET_80387 && mode == XFmode)
42164 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42165 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42166 units = GET_MODE_SIZE (mode);
42167 break;
42169 case MODE_COMPLEX_FLOAT:
42170 if ((TARGET_SSE && mode == TCmode)
42171 || (TARGET_80387 && mode == XCmode)
42172 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42173 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42174 units = GET_MODE_SIZE (mode);
42175 break;
42177 case MODE_VECTOR_INT:
42178 case MODE_VECTOR_FLOAT:
42179 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42180 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42181 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42182 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42183 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42184 units = GET_MODE_SIZE (mode);
42187 /* Return the cost of moving between two registers of mode MODE,
42188 assuming that the move will be in pieces of at most UNITS bytes. */
42189 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42192 /* Compute a (partial) cost for rtx X. Return true if the complete
42193 cost has been computed, and false if subexpressions should be
42194 scanned. In either case, *TOTAL contains the cost result. */
42196 static bool
42197 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
42198 int *total, bool speed)
42200 rtx mask;
42201 enum rtx_code code = GET_CODE (x);
42202 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42203 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42205 switch (code)
42207 case SET:
42208 if (register_operand (SET_DEST (x), VOIDmode)
42209 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42211 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42212 return true;
42214 return false;
42216 case CONST_INT:
42217 case CONST:
42218 case LABEL_REF:
42219 case SYMBOL_REF:
42220 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42221 *total = 3;
42222 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42223 *total = 2;
42224 else if (flag_pic && SYMBOLIC_CONST (x)
42225 && !(TARGET_64BIT
42226 && (GET_CODE (x) == LABEL_REF
42227 || (GET_CODE (x) == SYMBOL_REF
42228 && SYMBOL_REF_LOCAL_P (x))))
42229 /* Use 0 cost for CONST to improve its propagation. */
42230 && (TARGET_64BIT || GET_CODE (x) != CONST))
42231 *total = 1;
42232 else
42233 *total = 0;
42234 return true;
42236 case CONST_WIDE_INT:
42237 *total = 0;
42238 return true;
42240 case CONST_DOUBLE:
42241 switch (standard_80387_constant_p (x))
42243 case 1: /* 0.0 */
42244 *total = 1;
42245 return true;
42246 default: /* Other constants */
42247 *total = 2;
42248 return true;
42249 case 0:
42250 case -1:
42251 break;
42253 if (SSE_FLOAT_MODE_P (mode))
42255 case CONST_VECTOR:
42256 switch (standard_sse_constant_p (x))
42258 case 0:
42259 break;
42260 case 1: /* 0: xor eliminates false dependency */
42261 *total = 0;
42262 return true;
42263 default: /* -1: cmp contains false dependency */
42264 *total = 1;
42265 return true;
42268 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42269 it'll probably end up. Add a penalty for size. */
42270 *total = (COSTS_N_INSNS (1)
42271 + (flag_pic != 0 && !TARGET_64BIT)
42272 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42273 return true;
42275 case ZERO_EXTEND:
42276 /* The zero extensions is often completely free on x86_64, so make
42277 it as cheap as possible. */
42278 if (TARGET_64BIT && mode == DImode
42279 && GET_MODE (XEXP (x, 0)) == SImode)
42280 *total = 1;
42281 else if (TARGET_ZERO_EXTEND_WITH_AND)
42282 *total = cost->add;
42283 else
42284 *total = cost->movzx;
42285 return false;
42287 case SIGN_EXTEND:
42288 *total = cost->movsx;
42289 return false;
42291 case ASHIFT:
42292 if (SCALAR_INT_MODE_P (mode)
42293 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42294 && CONST_INT_P (XEXP (x, 1)))
42296 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42297 if (value == 1)
42299 *total = cost->add;
42300 return false;
42302 if ((value == 2 || value == 3)
42303 && cost->lea <= cost->shift_const)
42305 *total = cost->lea;
42306 return false;
42309 /* FALLTHRU */
42311 case ROTATE:
42312 case ASHIFTRT:
42313 case LSHIFTRT:
42314 case ROTATERT:
42315 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42317 /* ??? Should be SSE vector operation cost. */
42318 /* At least for published AMD latencies, this really is the same
42319 as the latency for a simple fpu operation like fabs. */
42320 /* V*QImode is emulated with 1-11 insns. */
42321 if (mode == V16QImode || mode == V32QImode)
42323 int count = 11;
42324 if (TARGET_XOP && mode == V16QImode)
42326 /* For XOP we use vpshab, which requires a broadcast of the
42327 value to the variable shift insn. For constants this
42328 means a V16Q const in mem; even when we can perform the
42329 shift with one insn set the cost to prefer paddb. */
42330 if (CONSTANT_P (XEXP (x, 1)))
42332 *total = (cost->fabs
42333 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
42334 + (speed ? 2 : COSTS_N_BYTES (16)));
42335 return true;
42337 count = 3;
42339 else if (TARGET_SSSE3)
42340 count = 7;
42341 *total = cost->fabs * count;
42343 else
42344 *total = cost->fabs;
42346 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42348 if (CONST_INT_P (XEXP (x, 1)))
42350 if (INTVAL (XEXP (x, 1)) > 32)
42351 *total = cost->shift_const + COSTS_N_INSNS (2);
42352 else
42353 *total = cost->shift_const * 2;
42355 else
42357 if (GET_CODE (XEXP (x, 1)) == AND)
42358 *total = cost->shift_var * 2;
42359 else
42360 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42363 else
42365 if (CONST_INT_P (XEXP (x, 1)))
42366 *total = cost->shift_const;
42367 else if (SUBREG_P (XEXP (x, 1))
42368 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42370 /* Return the cost after shift-and truncation. */
42371 *total = cost->shift_var;
42372 return true;
42374 else
42375 *total = cost->shift_var;
42377 return false;
42379 case FMA:
42381 rtx sub;
42383 gcc_assert (FLOAT_MODE_P (mode));
42384 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42386 /* ??? SSE scalar/vector cost should be used here. */
42387 /* ??? Bald assumption that fma has the same cost as fmul. */
42388 *total = cost->fmul;
42389 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
42391 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42392 sub = XEXP (x, 0);
42393 if (GET_CODE (sub) == NEG)
42394 sub = XEXP (sub, 0);
42395 *total += rtx_cost (sub, mode, FMA, 0, speed);
42397 sub = XEXP (x, 2);
42398 if (GET_CODE (sub) == NEG)
42399 sub = XEXP (sub, 0);
42400 *total += rtx_cost (sub, mode, FMA, 2, speed);
42401 return true;
42404 case MULT:
42405 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42407 /* ??? SSE scalar cost should be used here. */
42408 *total = cost->fmul;
42409 return false;
42411 else if (X87_FLOAT_MODE_P (mode))
42413 *total = cost->fmul;
42414 return false;
42416 else if (FLOAT_MODE_P (mode))
42418 /* ??? SSE vector cost should be used here. */
42419 *total = cost->fmul;
42420 return false;
42422 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42424 /* V*QImode is emulated with 7-13 insns. */
42425 if (mode == V16QImode || mode == V32QImode)
42427 int extra = 11;
42428 if (TARGET_XOP && mode == V16QImode)
42429 extra = 5;
42430 else if (TARGET_SSSE3)
42431 extra = 6;
42432 *total = cost->fmul * 2 + cost->fabs * extra;
42434 /* V*DImode is emulated with 5-8 insns. */
42435 else if (mode == V2DImode || mode == V4DImode)
42437 if (TARGET_XOP && mode == V2DImode)
42438 *total = cost->fmul * 2 + cost->fabs * 3;
42439 else
42440 *total = cost->fmul * 3 + cost->fabs * 5;
42442 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42443 insns, including two PMULUDQ. */
42444 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42445 *total = cost->fmul * 2 + cost->fabs * 5;
42446 else
42447 *total = cost->fmul;
42448 return false;
42450 else
42452 rtx op0 = XEXP (x, 0);
42453 rtx op1 = XEXP (x, 1);
42454 int nbits;
42455 if (CONST_INT_P (XEXP (x, 1)))
42457 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42458 for (nbits = 0; value != 0; value &= value - 1)
42459 nbits++;
42461 else
42462 /* This is arbitrary. */
42463 nbits = 7;
42465 /* Compute costs correctly for widening multiplication. */
42466 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42467 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42468 == GET_MODE_SIZE (mode))
42470 int is_mulwiden = 0;
42471 machine_mode inner_mode = GET_MODE (op0);
42473 if (GET_CODE (op0) == GET_CODE (op1))
42474 is_mulwiden = 1, op1 = XEXP (op1, 0);
42475 else if (CONST_INT_P (op1))
42477 if (GET_CODE (op0) == SIGN_EXTEND)
42478 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42479 == INTVAL (op1);
42480 else
42481 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42484 if (is_mulwiden)
42485 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42488 *total = (cost->mult_init[MODE_INDEX (mode)]
42489 + nbits * cost->mult_bit
42490 + rtx_cost (op0, mode, outer_code, opno, speed)
42491 + rtx_cost (op1, mode, outer_code, opno, speed));
42493 return true;
42496 case DIV:
42497 case UDIV:
42498 case MOD:
42499 case UMOD:
42500 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42501 /* ??? SSE cost should be used here. */
42502 *total = cost->fdiv;
42503 else if (X87_FLOAT_MODE_P (mode))
42504 *total = cost->fdiv;
42505 else if (FLOAT_MODE_P (mode))
42506 /* ??? SSE vector cost should be used here. */
42507 *total = cost->fdiv;
42508 else
42509 *total = cost->divide[MODE_INDEX (mode)];
42510 return false;
42512 case PLUS:
42513 if (GET_MODE_CLASS (mode) == MODE_INT
42514 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42516 if (GET_CODE (XEXP (x, 0)) == PLUS
42517 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42518 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42519 && CONSTANT_P (XEXP (x, 1)))
42521 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42522 if (val == 2 || val == 4 || val == 8)
42524 *total = cost->lea;
42525 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
42526 outer_code, opno, speed);
42527 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
42528 outer_code, opno, speed);
42529 *total += rtx_cost (XEXP (x, 1), mode,
42530 outer_code, opno, speed);
42531 return true;
42534 else if (GET_CODE (XEXP (x, 0)) == MULT
42535 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42537 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42538 if (val == 2 || val == 4 || val == 8)
42540 *total = cost->lea;
42541 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
42542 outer_code, opno, speed);
42543 *total += rtx_cost (XEXP (x, 1), mode,
42544 outer_code, opno, speed);
42545 return true;
42548 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42550 *total = cost->lea;
42551 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
42552 outer_code, opno, speed);
42553 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
42554 outer_code, opno, speed);
42555 *total += rtx_cost (XEXP (x, 1), mode,
42556 outer_code, opno, speed);
42557 return true;
42560 /* FALLTHRU */
42562 case MINUS:
42563 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42565 /* ??? SSE cost should be used here. */
42566 *total = cost->fadd;
42567 return false;
42569 else if (X87_FLOAT_MODE_P (mode))
42571 *total = cost->fadd;
42572 return false;
42574 else if (FLOAT_MODE_P (mode))
42576 /* ??? SSE vector cost should be used here. */
42577 *total = cost->fadd;
42578 return false;
42580 /* FALLTHRU */
42582 case AND:
42583 case IOR:
42584 case XOR:
42585 if (GET_MODE_CLASS (mode) == MODE_INT
42586 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42588 *total = (cost->add * 2
42589 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
42590 << (GET_MODE (XEXP (x, 0)) != DImode))
42591 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
42592 << (GET_MODE (XEXP (x, 1)) != DImode)));
42593 return true;
42595 /* FALLTHRU */
42597 case NEG:
42598 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42600 /* ??? SSE cost should be used here. */
42601 *total = cost->fchs;
42602 return false;
42604 else if (X87_FLOAT_MODE_P (mode))
42606 *total = cost->fchs;
42607 return false;
42609 else if (FLOAT_MODE_P (mode))
42611 /* ??? SSE vector cost should be used here. */
42612 *total = cost->fchs;
42613 return false;
42615 /* FALLTHRU */
42617 case NOT:
42618 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42620 /* ??? Should be SSE vector operation cost. */
42621 /* At least for published AMD latencies, this really is the same
42622 as the latency for a simple fpu operation like fabs. */
42623 *total = cost->fabs;
42625 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42626 *total = cost->add * 2;
42627 else
42628 *total = cost->add;
42629 return false;
42631 case COMPARE:
42632 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42633 && XEXP (XEXP (x, 0), 1) == const1_rtx
42634 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42635 && XEXP (x, 1) == const0_rtx)
42637 /* This kind of construct is implemented using test[bwl].
42638 Treat it as if we had an AND. */
42639 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
42640 *total = (cost->add
42641 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
42642 opno, speed)
42643 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
42644 return true;
42647 /* The embedded comparison operand is completely free. */
42648 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
42649 && XEXP (x, 1) == const0_rtx)
42650 *total = 0;
42652 return false;
42654 case FLOAT_EXTEND:
42655 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42656 *total = 0;
42657 return false;
42659 case ABS:
42660 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42661 /* ??? SSE cost should be used here. */
42662 *total = cost->fabs;
42663 else if (X87_FLOAT_MODE_P (mode))
42664 *total = cost->fabs;
42665 else if (FLOAT_MODE_P (mode))
42666 /* ??? SSE vector cost should be used here. */
42667 *total = cost->fabs;
42668 return false;
42670 case SQRT:
42671 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42672 /* ??? SSE cost should be used here. */
42673 *total = cost->fsqrt;
42674 else if (X87_FLOAT_MODE_P (mode))
42675 *total = cost->fsqrt;
42676 else if (FLOAT_MODE_P (mode))
42677 /* ??? SSE vector cost should be used here. */
42678 *total = cost->fsqrt;
42679 return false;
42681 case UNSPEC:
42682 if (XINT (x, 1) == UNSPEC_TP)
42683 *total = 0;
42684 return false;
42686 case VEC_SELECT:
42687 case VEC_CONCAT:
42688 case VEC_DUPLICATE:
42689 /* ??? Assume all of these vector manipulation patterns are
42690 recognizable. In which case they all pretty much have the
42691 same cost. */
42692 *total = cost->fabs;
42693 return true;
42694 case VEC_MERGE:
42695 mask = XEXP (x, 2);
42696 /* This is masked instruction, assume the same cost,
42697 as nonmasked variant. */
42698 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42699 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
42700 else
42701 *total = cost->fabs;
42702 return true;
42704 default:
42705 return false;
42709 #if TARGET_MACHO
42711 static int current_machopic_label_num;
42713 /* Given a symbol name and its associated stub, write out the
42714 definition of the stub. */
42716 void
42717 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42719 unsigned int length;
42720 char *binder_name, *symbol_name, lazy_ptr_name[32];
42721 int label = ++current_machopic_label_num;
42723 /* For 64-bit we shouldn't get here. */
42724 gcc_assert (!TARGET_64BIT);
42726 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42727 symb = targetm.strip_name_encoding (symb);
42729 length = strlen (stub);
42730 binder_name = XALLOCAVEC (char, length + 32);
42731 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42733 length = strlen (symb);
42734 symbol_name = XALLOCAVEC (char, length + 32);
42735 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42737 sprintf (lazy_ptr_name, "L%d$lz", label);
42739 if (MACHOPIC_ATT_STUB)
42740 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42741 else if (MACHOPIC_PURE)
42742 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42743 else
42744 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42746 fprintf (file, "%s:\n", stub);
42747 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42749 if (MACHOPIC_ATT_STUB)
42751 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42753 else if (MACHOPIC_PURE)
42755 /* PIC stub. */
42756 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42757 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42758 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42759 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42760 label, lazy_ptr_name, label);
42761 fprintf (file, "\tjmp\t*%%ecx\n");
42763 else
42764 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42766 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42767 it needs no stub-binding-helper. */
42768 if (MACHOPIC_ATT_STUB)
42769 return;
42771 fprintf (file, "%s:\n", binder_name);
42773 if (MACHOPIC_PURE)
42775 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42776 fprintf (file, "\tpushl\t%%ecx\n");
42778 else
42779 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42781 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42783 /* N.B. Keep the correspondence of these
42784 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42785 old-pic/new-pic/non-pic stubs; altering this will break
42786 compatibility with existing dylibs. */
42787 if (MACHOPIC_PURE)
42789 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42790 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42792 else
42793 /* 16-byte -mdynamic-no-pic stub. */
42794 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42796 fprintf (file, "%s:\n", lazy_ptr_name);
42797 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42798 fprintf (file, ASM_LONG "%s\n", binder_name);
42800 #endif /* TARGET_MACHO */
42802 /* Order the registers for register allocator. */
42804 void
42805 x86_order_regs_for_local_alloc (void)
42807 int pos = 0;
42808 int i;
42810 /* First allocate the local general purpose registers. */
42811 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42812 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42813 reg_alloc_order [pos++] = i;
42815 /* Global general purpose registers. */
42816 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42817 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42818 reg_alloc_order [pos++] = i;
42820 /* x87 registers come first in case we are doing FP math
42821 using them. */
42822 if (!TARGET_SSE_MATH)
42823 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42824 reg_alloc_order [pos++] = i;
42826 /* SSE registers. */
42827 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42828 reg_alloc_order [pos++] = i;
42829 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42830 reg_alloc_order [pos++] = i;
42832 /* Extended REX SSE registers. */
42833 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42834 reg_alloc_order [pos++] = i;
42836 /* Mask register. */
42837 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42838 reg_alloc_order [pos++] = i;
42840 /* MPX bound registers. */
42841 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42842 reg_alloc_order [pos++] = i;
42844 /* x87 registers. */
42845 if (TARGET_SSE_MATH)
42846 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42847 reg_alloc_order [pos++] = i;
42849 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42850 reg_alloc_order [pos++] = i;
42852 /* Initialize the rest of array as we do not allocate some registers
42853 at all. */
42854 while (pos < FIRST_PSEUDO_REGISTER)
42855 reg_alloc_order [pos++] = 0;
42858 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42859 in struct attribute_spec handler. */
42860 static tree
42861 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42862 tree args,
42863 int,
42864 bool *no_add_attrs)
42866 if (TREE_CODE (*node) != FUNCTION_TYPE
42867 && TREE_CODE (*node) != METHOD_TYPE
42868 && TREE_CODE (*node) != FIELD_DECL
42869 && TREE_CODE (*node) != TYPE_DECL)
42871 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42872 name);
42873 *no_add_attrs = true;
42874 return NULL_TREE;
42876 if (TARGET_64BIT)
42878 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42879 name);
42880 *no_add_attrs = true;
42881 return NULL_TREE;
42883 if (is_attribute_p ("callee_pop_aggregate_return", name))
42885 tree cst;
42887 cst = TREE_VALUE (args);
42888 if (TREE_CODE (cst) != INTEGER_CST)
42890 warning (OPT_Wattributes,
42891 "%qE attribute requires an integer constant argument",
42892 name);
42893 *no_add_attrs = true;
42895 else if (compare_tree_int (cst, 0) != 0
42896 && compare_tree_int (cst, 1) != 0)
42898 warning (OPT_Wattributes,
42899 "argument to %qE attribute is neither zero, nor one",
42900 name);
42901 *no_add_attrs = true;
42904 return NULL_TREE;
42907 return NULL_TREE;
42910 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42911 struct attribute_spec.handler. */
42912 static tree
42913 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42914 bool *no_add_attrs)
42916 if (TREE_CODE (*node) != FUNCTION_TYPE
42917 && TREE_CODE (*node) != METHOD_TYPE
42918 && TREE_CODE (*node) != FIELD_DECL
42919 && TREE_CODE (*node) != TYPE_DECL)
42921 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42922 name);
42923 *no_add_attrs = true;
42924 return NULL_TREE;
42927 /* Can combine regparm with all attributes but fastcall. */
42928 if (is_attribute_p ("ms_abi", name))
42930 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42932 error ("ms_abi and sysv_abi attributes are not compatible");
42935 return NULL_TREE;
42937 else if (is_attribute_p ("sysv_abi", name))
42939 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42941 error ("ms_abi and sysv_abi attributes are not compatible");
42944 return NULL_TREE;
42947 return NULL_TREE;
42950 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42951 struct attribute_spec.handler. */
42952 static tree
42953 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42954 bool *no_add_attrs)
42956 tree *type = NULL;
42957 if (DECL_P (*node))
42959 if (TREE_CODE (*node) == TYPE_DECL)
42960 type = &TREE_TYPE (*node);
42962 else
42963 type = node;
42965 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
42967 warning (OPT_Wattributes, "%qE attribute ignored",
42968 name);
42969 *no_add_attrs = true;
42972 else if ((is_attribute_p ("ms_struct", name)
42973 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
42974 || ((is_attribute_p ("gcc_struct", name)
42975 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
42977 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
42978 name);
42979 *no_add_attrs = true;
42982 return NULL_TREE;
42985 static tree
42986 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
42987 bool *no_add_attrs)
42989 if (TREE_CODE (*node) != FUNCTION_DECL)
42991 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42992 name);
42993 *no_add_attrs = true;
42995 return NULL_TREE;
42998 static bool
42999 ix86_ms_bitfield_layout_p (const_tree record_type)
43001 return ((TARGET_MS_BITFIELD_LAYOUT
43002 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
43003 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
43006 /* Returns an expression indicating where the this parameter is
43007 located on entry to the FUNCTION. */
43009 static rtx
43010 x86_this_parameter (tree function)
43012 tree type = TREE_TYPE (function);
43013 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
43014 int nregs;
43016 if (TARGET_64BIT)
43018 const int *parm_regs;
43020 if (ix86_function_type_abi (type) == MS_ABI)
43021 parm_regs = x86_64_ms_abi_int_parameter_registers;
43022 else
43023 parm_regs = x86_64_int_parameter_registers;
43024 return gen_rtx_REG (Pmode, parm_regs[aggr]);
43027 nregs = ix86_function_regparm (type, function);
43029 if (nregs > 0 && !stdarg_p (type))
43031 int regno;
43032 unsigned int ccvt = ix86_get_callcvt (type);
43034 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43035 regno = aggr ? DX_REG : CX_REG;
43036 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43038 regno = CX_REG;
43039 if (aggr)
43040 return gen_rtx_MEM (SImode,
43041 plus_constant (Pmode, stack_pointer_rtx, 4));
43043 else
43045 regno = AX_REG;
43046 if (aggr)
43048 regno = DX_REG;
43049 if (nregs == 1)
43050 return gen_rtx_MEM (SImode,
43051 plus_constant (Pmode,
43052 stack_pointer_rtx, 4));
43055 return gen_rtx_REG (SImode, regno);
43058 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43059 aggr ? 8 : 4));
43062 /* Determine whether x86_output_mi_thunk can succeed. */
43064 static bool
43065 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43066 const_tree function)
43068 /* 64-bit can handle anything. */
43069 if (TARGET_64BIT)
43070 return true;
43072 /* For 32-bit, everything's fine if we have one free register. */
43073 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43074 return true;
43076 /* Need a free register for vcall_offset. */
43077 if (vcall_offset)
43078 return false;
43080 /* Need a free register for GOT references. */
43081 if (flag_pic && !targetm.binds_local_p (function))
43082 return false;
43084 /* Otherwise ok. */
43085 return true;
43088 /* Output the assembler code for a thunk function. THUNK_DECL is the
43089 declaration for the thunk function itself, FUNCTION is the decl for
43090 the target function. DELTA is an immediate constant offset to be
43091 added to THIS. If VCALL_OFFSET is nonzero, the word at
43092 *(*this + vcall_offset) should be added to THIS. */
43094 static void
43095 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43096 HOST_WIDE_INT vcall_offset, tree function)
43098 rtx this_param = x86_this_parameter (function);
43099 rtx this_reg, tmp, fnaddr;
43100 unsigned int tmp_regno;
43101 rtx_insn *insn;
43103 if (TARGET_64BIT)
43104 tmp_regno = R10_REG;
43105 else
43107 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43108 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43109 tmp_regno = AX_REG;
43110 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43111 tmp_regno = DX_REG;
43112 else
43113 tmp_regno = CX_REG;
43116 emit_note (NOTE_INSN_PROLOGUE_END);
43118 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43119 pull it in now and let DELTA benefit. */
43120 if (REG_P (this_param))
43121 this_reg = this_param;
43122 else if (vcall_offset)
43124 /* Put the this parameter into %eax. */
43125 this_reg = gen_rtx_REG (Pmode, AX_REG);
43126 emit_move_insn (this_reg, this_param);
43128 else
43129 this_reg = NULL_RTX;
43131 /* Adjust the this parameter by a fixed constant. */
43132 if (delta)
43134 rtx delta_rtx = GEN_INT (delta);
43135 rtx delta_dst = this_reg ? this_reg : this_param;
43137 if (TARGET_64BIT)
43139 if (!x86_64_general_operand (delta_rtx, Pmode))
43141 tmp = gen_rtx_REG (Pmode, tmp_regno);
43142 emit_move_insn (tmp, delta_rtx);
43143 delta_rtx = tmp;
43147 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43150 /* Adjust the this parameter by a value stored in the vtable. */
43151 if (vcall_offset)
43153 rtx vcall_addr, vcall_mem, this_mem;
43155 tmp = gen_rtx_REG (Pmode, tmp_regno);
43157 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43158 if (Pmode != ptr_mode)
43159 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43160 emit_move_insn (tmp, this_mem);
43162 /* Adjust the this parameter. */
43163 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43164 if (TARGET_64BIT
43165 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43167 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43168 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43169 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43172 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43173 if (Pmode != ptr_mode)
43174 emit_insn (gen_addsi_1_zext (this_reg,
43175 gen_rtx_REG (ptr_mode,
43176 REGNO (this_reg)),
43177 vcall_mem));
43178 else
43179 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43182 /* If necessary, drop THIS back to its stack slot. */
43183 if (this_reg && this_reg != this_param)
43184 emit_move_insn (this_param, this_reg);
43186 fnaddr = XEXP (DECL_RTL (function), 0);
43187 if (TARGET_64BIT)
43189 if (!flag_pic || targetm.binds_local_p (function)
43190 || TARGET_PECOFF)
43192 else
43194 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43195 tmp = gen_rtx_CONST (Pmode, tmp);
43196 fnaddr = gen_const_mem (Pmode, tmp);
43199 else
43201 if (!flag_pic || targetm.binds_local_p (function))
43203 #if TARGET_MACHO
43204 else if (TARGET_MACHO)
43206 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43207 fnaddr = XEXP (fnaddr, 0);
43209 #endif /* TARGET_MACHO */
43210 else
43212 tmp = gen_rtx_REG (Pmode, CX_REG);
43213 output_set_got (tmp, NULL_RTX);
43215 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43216 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43217 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43218 fnaddr = gen_const_mem (Pmode, fnaddr);
43222 /* Our sibling call patterns do not allow memories, because we have no
43223 predicate that can distinguish between frame and non-frame memory.
43224 For our purposes here, we can get away with (ab)using a jump pattern,
43225 because we're going to do no optimization. */
43226 if (MEM_P (fnaddr))
43228 if (sibcall_insn_operand (fnaddr, word_mode))
43230 fnaddr = XEXP (DECL_RTL (function), 0);
43231 tmp = gen_rtx_MEM (QImode, fnaddr);
43232 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43233 tmp = emit_call_insn (tmp);
43234 SIBLING_CALL_P (tmp) = 1;
43236 else
43237 emit_jump_insn (gen_indirect_jump (fnaddr));
43239 else
43241 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43243 // CM_LARGE_PIC always uses pseudo PIC register which is
43244 // uninitialized. Since FUNCTION is local and calling it
43245 // doesn't go through PLT, we use scratch register %r11 as
43246 // PIC register and initialize it here.
43247 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43248 ix86_init_large_pic_reg (tmp_regno);
43249 fnaddr = legitimize_pic_address (fnaddr,
43250 gen_rtx_REG (Pmode, tmp_regno));
43253 if (!sibcall_insn_operand (fnaddr, word_mode))
43255 tmp = gen_rtx_REG (word_mode, tmp_regno);
43256 if (GET_MODE (fnaddr) != word_mode)
43257 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43258 emit_move_insn (tmp, fnaddr);
43259 fnaddr = tmp;
43262 tmp = gen_rtx_MEM (QImode, fnaddr);
43263 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43264 tmp = emit_call_insn (tmp);
43265 SIBLING_CALL_P (tmp) = 1;
43267 emit_barrier ();
43269 /* Emit just enough of rest_of_compilation to get the insns emitted.
43270 Note that use_thunk calls assemble_start_function et al. */
43271 insn = get_insns ();
43272 shorten_branches (insn);
43273 final_start_function (insn, file, 1);
43274 final (insn, file, 1);
43275 final_end_function ();
43278 static void
43279 x86_file_start (void)
43281 default_file_start ();
43282 if (TARGET_16BIT)
43283 fputs ("\t.code16gcc\n", asm_out_file);
43284 #if TARGET_MACHO
43285 darwin_file_start ();
43286 #endif
43287 if (X86_FILE_START_VERSION_DIRECTIVE)
43288 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43289 if (X86_FILE_START_FLTUSED)
43290 fputs ("\t.global\t__fltused\n", asm_out_file);
43291 if (ix86_asm_dialect == ASM_INTEL)
43292 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43296 x86_field_alignment (tree field, int computed)
43298 machine_mode mode;
43299 tree type = TREE_TYPE (field);
43301 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43302 return computed;
43303 if (TARGET_IAMCU)
43304 return iamcu_alignment (type, computed);
43305 mode = TYPE_MODE (strip_array_types (type));
43306 if (mode == DFmode || mode == DCmode
43307 || GET_MODE_CLASS (mode) == MODE_INT
43308 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43309 return MIN (32, computed);
43310 return computed;
43313 /* Print call to TARGET to FILE. */
43315 static void
43316 x86_print_call_or_nop (FILE *file, const char *target)
43318 if (flag_nop_mcount)
43319 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43320 else
43321 fprintf (file, "1:\tcall\t%s\n", target);
43324 /* Output assembler code to FILE to increment profiler label # LABELNO
43325 for profiling a function entry. */
43326 void
43327 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43329 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43330 : MCOUNT_NAME);
43331 if (TARGET_64BIT)
43333 #ifndef NO_PROFILE_COUNTERS
43334 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43335 #endif
43337 if (!TARGET_PECOFF && flag_pic)
43338 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43339 else
43340 x86_print_call_or_nop (file, mcount_name);
43342 else if (flag_pic)
43344 #ifndef NO_PROFILE_COUNTERS
43345 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43346 LPREFIX, labelno);
43347 #endif
43348 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43350 else
43352 #ifndef NO_PROFILE_COUNTERS
43353 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43354 LPREFIX, labelno);
43355 #endif
43356 x86_print_call_or_nop (file, mcount_name);
43359 if (flag_record_mcount)
43361 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43362 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43363 fprintf (file, "\t.previous\n");
43367 /* We don't have exact information about the insn sizes, but we may assume
43368 quite safely that we are informed about all 1 byte insns and memory
43369 address sizes. This is enough to eliminate unnecessary padding in
43370 99% of cases. */
43372 static int
43373 min_insn_size (rtx_insn *insn)
43375 int l = 0, len;
43377 if (!INSN_P (insn) || !active_insn_p (insn))
43378 return 0;
43380 /* Discard alignments we've emit and jump instructions. */
43381 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43382 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43383 return 0;
43385 /* Important case - calls are always 5 bytes.
43386 It is common to have many calls in the row. */
43387 if (CALL_P (insn)
43388 && symbolic_reference_mentioned_p (PATTERN (insn))
43389 && !SIBLING_CALL_P (insn))
43390 return 5;
43391 len = get_attr_length (insn);
43392 if (len <= 1)
43393 return 1;
43395 /* For normal instructions we rely on get_attr_length being exact,
43396 with a few exceptions. */
43397 if (!JUMP_P (insn))
43399 enum attr_type type = get_attr_type (insn);
43401 switch (type)
43403 case TYPE_MULTI:
43404 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43405 || asm_noperands (PATTERN (insn)) >= 0)
43406 return 0;
43407 break;
43408 case TYPE_OTHER:
43409 case TYPE_FCMP:
43410 break;
43411 default:
43412 /* Otherwise trust get_attr_length. */
43413 return len;
43416 l = get_attr_length_address (insn);
43417 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43418 l = 4;
43420 if (l)
43421 return 1+l;
43422 else
43423 return 2;
43426 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43428 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43429 window. */
43431 static void
43432 ix86_avoid_jump_mispredicts (void)
43434 rtx_insn *insn, *start = get_insns ();
43435 int nbytes = 0, njumps = 0;
43436 bool isjump = false;
43438 /* Look for all minimal intervals of instructions containing 4 jumps.
43439 The intervals are bounded by START and INSN. NBYTES is the total
43440 size of instructions in the interval including INSN and not including
43441 START. When the NBYTES is smaller than 16 bytes, it is possible
43442 that the end of START and INSN ends up in the same 16byte page.
43444 The smallest offset in the page INSN can start is the case where START
43445 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43446 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43448 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43449 have to, control transfer to label(s) can be performed through other
43450 means, and also we estimate minimum length of all asm stmts as 0. */
43451 for (insn = start; insn; insn = NEXT_INSN (insn))
43453 int min_size;
43455 if (LABEL_P (insn))
43457 int align = label_to_alignment (insn);
43458 int max_skip = label_to_max_skip (insn);
43460 if (max_skip > 15)
43461 max_skip = 15;
43462 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43463 already in the current 16 byte page, because otherwise
43464 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43465 bytes to reach 16 byte boundary. */
43466 if (align <= 0
43467 || (align <= 3 && max_skip != (1 << align) - 1))
43468 max_skip = 0;
43469 if (dump_file)
43470 fprintf (dump_file, "Label %i with max_skip %i\n",
43471 INSN_UID (insn), max_skip);
43472 if (max_skip)
43474 while (nbytes + max_skip >= 16)
43476 start = NEXT_INSN (start);
43477 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43478 || CALL_P (start))
43479 njumps--, isjump = true;
43480 else
43481 isjump = false;
43482 nbytes -= min_insn_size (start);
43485 continue;
43488 min_size = min_insn_size (insn);
43489 nbytes += min_size;
43490 if (dump_file)
43491 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43492 INSN_UID (insn), min_size);
43493 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43494 || CALL_P (insn))
43495 njumps++;
43496 else
43497 continue;
43499 while (njumps > 3)
43501 start = NEXT_INSN (start);
43502 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43503 || CALL_P (start))
43504 njumps--, isjump = true;
43505 else
43506 isjump = false;
43507 nbytes -= min_insn_size (start);
43509 gcc_assert (njumps >= 0);
43510 if (dump_file)
43511 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43512 INSN_UID (start), INSN_UID (insn), nbytes);
43514 if (njumps == 3 && isjump && nbytes < 16)
43516 int padsize = 15 - nbytes + min_insn_size (insn);
43518 if (dump_file)
43519 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43520 INSN_UID (insn), padsize);
43521 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43525 #endif
43527 /* AMD Athlon works faster
43528 when RET is not destination of conditional jump or directly preceded
43529 by other jump instruction. We avoid the penalty by inserting NOP just
43530 before the RET instructions in such cases. */
43531 static void
43532 ix86_pad_returns (void)
43534 edge e;
43535 edge_iterator ei;
43537 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43539 basic_block bb = e->src;
43540 rtx_insn *ret = BB_END (bb);
43541 rtx_insn *prev;
43542 bool replace = false;
43544 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43545 || optimize_bb_for_size_p (bb))
43546 continue;
43547 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43548 if (active_insn_p (prev) || LABEL_P (prev))
43549 break;
43550 if (prev && LABEL_P (prev))
43552 edge e;
43553 edge_iterator ei;
43555 FOR_EACH_EDGE (e, ei, bb->preds)
43556 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43557 && !(e->flags & EDGE_FALLTHRU))
43559 replace = true;
43560 break;
43563 if (!replace)
43565 prev = prev_active_insn (ret);
43566 if (prev
43567 && ((JUMP_P (prev) && any_condjump_p (prev))
43568 || CALL_P (prev)))
43569 replace = true;
43570 /* Empty functions get branch mispredict even when
43571 the jump destination is not visible to us. */
43572 if (!prev && !optimize_function_for_size_p (cfun))
43573 replace = true;
43575 if (replace)
43577 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43578 delete_insn (ret);
43583 /* Count the minimum number of instructions in BB. Return 4 if the
43584 number of instructions >= 4. */
43586 static int
43587 ix86_count_insn_bb (basic_block bb)
43589 rtx_insn *insn;
43590 int insn_count = 0;
43592 /* Count number of instructions in this block. Return 4 if the number
43593 of instructions >= 4. */
43594 FOR_BB_INSNS (bb, insn)
43596 /* Only happen in exit blocks. */
43597 if (JUMP_P (insn)
43598 && ANY_RETURN_P (PATTERN (insn)))
43599 break;
43601 if (NONDEBUG_INSN_P (insn)
43602 && GET_CODE (PATTERN (insn)) != USE
43603 && GET_CODE (PATTERN (insn)) != CLOBBER)
43605 insn_count++;
43606 if (insn_count >= 4)
43607 return insn_count;
43611 return insn_count;
43615 /* Count the minimum number of instructions in code path in BB.
43616 Return 4 if the number of instructions >= 4. */
43618 static int
43619 ix86_count_insn (basic_block bb)
43621 edge e;
43622 edge_iterator ei;
43623 int min_prev_count;
43625 /* Only bother counting instructions along paths with no
43626 more than 2 basic blocks between entry and exit. Given
43627 that BB has an edge to exit, determine if a predecessor
43628 of BB has an edge from entry. If so, compute the number
43629 of instructions in the predecessor block. If there
43630 happen to be multiple such blocks, compute the minimum. */
43631 min_prev_count = 4;
43632 FOR_EACH_EDGE (e, ei, bb->preds)
43634 edge prev_e;
43635 edge_iterator prev_ei;
43637 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43639 min_prev_count = 0;
43640 break;
43642 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43644 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43646 int count = ix86_count_insn_bb (e->src);
43647 if (count < min_prev_count)
43648 min_prev_count = count;
43649 break;
43654 if (min_prev_count < 4)
43655 min_prev_count += ix86_count_insn_bb (bb);
43657 return min_prev_count;
43660 /* Pad short function to 4 instructions. */
43662 static void
43663 ix86_pad_short_function (void)
43665 edge e;
43666 edge_iterator ei;
43668 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43670 rtx_insn *ret = BB_END (e->src);
43671 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43673 int insn_count = ix86_count_insn (e->src);
43675 /* Pad short function. */
43676 if (insn_count < 4)
43678 rtx_insn *insn = ret;
43680 /* Find epilogue. */
43681 while (insn
43682 && (!NOTE_P (insn)
43683 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43684 insn = PREV_INSN (insn);
43686 if (!insn)
43687 insn = ret;
43689 /* Two NOPs count as one instruction. */
43690 insn_count = 2 * (4 - insn_count);
43691 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43697 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43698 the epilogue, the Windows system unwinder will apply epilogue logic and
43699 produce incorrect offsets. This can be avoided by adding a nop between
43700 the last insn that can throw and the first insn of the epilogue. */
43702 static void
43703 ix86_seh_fixup_eh_fallthru (void)
43705 edge e;
43706 edge_iterator ei;
43708 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43710 rtx_insn *insn, *next;
43712 /* Find the beginning of the epilogue. */
43713 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43714 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43715 break;
43716 if (insn == NULL)
43717 continue;
43719 /* We only care about preceding insns that can throw. */
43720 insn = prev_active_insn (insn);
43721 if (insn == NULL || !can_throw_internal (insn))
43722 continue;
43724 /* Do not separate calls from their debug information. */
43725 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43726 if (NOTE_P (next)
43727 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43728 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43729 insn = next;
43730 else
43731 break;
43733 emit_insn_after (gen_nops (const1_rtx), insn);
43737 /* Implement machine specific optimizations. We implement padding of returns
43738 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43739 static void
43740 ix86_reorg (void)
43742 /* We are freeing block_for_insn in the toplev to keep compatibility
43743 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43744 compute_bb_for_insn ();
43746 if (TARGET_SEH && current_function_has_exception_handlers ())
43747 ix86_seh_fixup_eh_fallthru ();
43749 if (optimize && optimize_function_for_speed_p (cfun))
43751 if (TARGET_PAD_SHORT_FUNCTION)
43752 ix86_pad_short_function ();
43753 else if (TARGET_PAD_RETURNS)
43754 ix86_pad_returns ();
43755 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43756 if (TARGET_FOUR_JUMP_LIMIT)
43757 ix86_avoid_jump_mispredicts ();
43758 #endif
43762 /* Return nonzero when QImode register that must be represented via REX prefix
43763 is used. */
43764 bool
43765 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43767 int i;
43768 extract_insn_cached (insn);
43769 for (i = 0; i < recog_data.n_operands; i++)
43770 if (GENERAL_REG_P (recog_data.operand[i])
43771 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43772 return true;
43773 return false;
43776 /* Return true when INSN mentions register that must be encoded using REX
43777 prefix. */
43778 bool
43779 x86_extended_reg_mentioned_p (rtx insn)
43781 subrtx_iterator::array_type array;
43782 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43784 const_rtx x = *iter;
43785 if (REG_P (x)
43786 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43787 return true;
43789 return false;
43792 /* If profitable, negate (without causing overflow) integer constant
43793 of mode MODE at location LOC. Return true in this case. */
43794 bool
43795 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43797 HOST_WIDE_INT val;
43799 if (!CONST_INT_P (*loc))
43800 return false;
43802 switch (mode)
43804 case DImode:
43805 /* DImode x86_64 constants must fit in 32 bits. */
43806 gcc_assert (x86_64_immediate_operand (*loc, mode));
43808 mode = SImode;
43809 break;
43811 case SImode:
43812 case HImode:
43813 case QImode:
43814 break;
43816 default:
43817 gcc_unreachable ();
43820 /* Avoid overflows. */
43821 if (mode_signbit_p (mode, *loc))
43822 return false;
43824 val = INTVAL (*loc);
43826 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43827 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43828 if ((val < 0 && val != -128)
43829 || val == 128)
43831 *loc = GEN_INT (-val);
43832 return true;
43835 return false;
43838 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43839 optabs would emit if we didn't have TFmode patterns. */
43841 void
43842 x86_emit_floatuns (rtx operands[2])
43844 rtx_code_label *neglab, *donelab;
43845 rtx i0, i1, f0, in, out;
43846 machine_mode mode, inmode;
43848 inmode = GET_MODE (operands[1]);
43849 gcc_assert (inmode == SImode || inmode == DImode);
43851 out = operands[0];
43852 in = force_reg (inmode, operands[1]);
43853 mode = GET_MODE (out);
43854 neglab = gen_label_rtx ();
43855 donelab = gen_label_rtx ();
43856 f0 = gen_reg_rtx (mode);
43858 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43860 expand_float (out, in, 0);
43862 emit_jump_insn (gen_jump (donelab));
43863 emit_barrier ();
43865 emit_label (neglab);
43867 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43868 1, OPTAB_DIRECT);
43869 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43870 1, OPTAB_DIRECT);
43871 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43873 expand_float (f0, i0, 0);
43875 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43877 emit_label (donelab);
43880 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43881 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43882 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43883 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43885 /* Get a vector mode of the same size as the original but with elements
43886 twice as wide. This is only guaranteed to apply to integral vectors. */
43888 static inline machine_mode
43889 get_mode_wider_vector (machine_mode o)
43891 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43892 machine_mode n = GET_MODE_WIDER_MODE (o);
43893 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43894 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43895 return n;
43898 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43899 fill target with val via vec_duplicate. */
43901 static bool
43902 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43904 bool ok;
43905 rtx_insn *insn;
43906 rtx dup;
43908 /* First attempt to recognize VAL as-is. */
43909 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43910 insn = emit_insn (gen_rtx_SET (target, dup));
43911 if (recog_memoized (insn) < 0)
43913 rtx_insn *seq;
43914 /* If that fails, force VAL into a register. */
43916 start_sequence ();
43917 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43918 seq = get_insns ();
43919 end_sequence ();
43920 if (seq)
43921 emit_insn_before (seq, insn);
43923 ok = recog_memoized (insn) >= 0;
43924 gcc_assert (ok);
43926 return true;
43929 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43930 with all elements equal to VAR. Return true if successful. */
43932 static bool
43933 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43934 rtx target, rtx val)
43936 bool ok;
43938 switch (mode)
43940 case V2SImode:
43941 case V2SFmode:
43942 if (!mmx_ok)
43943 return false;
43944 /* FALLTHRU */
43946 case V4DFmode:
43947 case V4DImode:
43948 case V8SFmode:
43949 case V8SImode:
43950 case V2DFmode:
43951 case V2DImode:
43952 case V4SFmode:
43953 case V4SImode:
43954 case V16SImode:
43955 case V8DImode:
43956 case V16SFmode:
43957 case V8DFmode:
43958 return ix86_vector_duplicate_value (mode, target, val);
43960 case V4HImode:
43961 if (!mmx_ok)
43962 return false;
43963 if (TARGET_SSE || TARGET_3DNOW_A)
43965 rtx x;
43967 val = gen_lowpart (SImode, val);
43968 x = gen_rtx_TRUNCATE (HImode, val);
43969 x = gen_rtx_VEC_DUPLICATE (mode, x);
43970 emit_insn (gen_rtx_SET (target, x));
43971 return true;
43973 goto widen;
43975 case V8QImode:
43976 if (!mmx_ok)
43977 return false;
43978 goto widen;
43980 case V8HImode:
43981 if (TARGET_AVX2)
43982 return ix86_vector_duplicate_value (mode, target, val);
43984 if (TARGET_SSE2)
43986 struct expand_vec_perm_d dperm;
43987 rtx tmp1, tmp2;
43989 permute:
43990 memset (&dperm, 0, sizeof (dperm));
43991 dperm.target = target;
43992 dperm.vmode = mode;
43993 dperm.nelt = GET_MODE_NUNITS (mode);
43994 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
43995 dperm.one_operand_p = true;
43997 /* Extend to SImode using a paradoxical SUBREG. */
43998 tmp1 = gen_reg_rtx (SImode);
43999 emit_move_insn (tmp1, gen_lowpart (SImode, val));
44001 /* Insert the SImode value as low element of a V4SImode vector. */
44002 tmp2 = gen_reg_rtx (V4SImode);
44003 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
44004 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
44006 ok = (expand_vec_perm_1 (&dperm)
44007 || expand_vec_perm_broadcast_1 (&dperm));
44008 gcc_assert (ok);
44009 return ok;
44011 goto widen;
44013 case V16QImode:
44014 if (TARGET_AVX2)
44015 return ix86_vector_duplicate_value (mode, target, val);
44017 if (TARGET_SSE2)
44018 goto permute;
44019 goto widen;
44021 widen:
44022 /* Replicate the value once into the next wider mode and recurse. */
44024 machine_mode smode, wsmode, wvmode;
44025 rtx x;
44027 smode = GET_MODE_INNER (mode);
44028 wvmode = get_mode_wider_vector (mode);
44029 wsmode = GET_MODE_INNER (wvmode);
44031 val = convert_modes (wsmode, smode, val, true);
44032 x = expand_simple_binop (wsmode, ASHIFT, val,
44033 GEN_INT (GET_MODE_BITSIZE (smode)),
44034 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44035 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44037 x = gen_reg_rtx (wvmode);
44038 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44039 gcc_assert (ok);
44040 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44041 return ok;
44044 case V16HImode:
44045 case V32QImode:
44046 if (TARGET_AVX2)
44047 return ix86_vector_duplicate_value (mode, target, val);
44048 else
44050 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44051 rtx x = gen_reg_rtx (hvmode);
44053 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44054 gcc_assert (ok);
44056 x = gen_rtx_VEC_CONCAT (mode, x, x);
44057 emit_insn (gen_rtx_SET (target, x));
44059 return true;
44061 case V64QImode:
44062 case V32HImode:
44063 if (TARGET_AVX512BW)
44064 return ix86_vector_duplicate_value (mode, target, val);
44065 else
44067 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44068 rtx x = gen_reg_rtx (hvmode);
44070 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44071 gcc_assert (ok);
44073 x = gen_rtx_VEC_CONCAT (mode, x, x);
44074 emit_insn (gen_rtx_SET (target, x));
44076 return true;
44078 default:
44079 return false;
44083 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44084 whose ONE_VAR element is VAR, and other elements are zero. Return true
44085 if successful. */
44087 static bool
44088 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44089 rtx target, rtx var, int one_var)
44091 machine_mode vsimode;
44092 rtx new_target;
44093 rtx x, tmp;
44094 bool use_vector_set = false;
44096 switch (mode)
44098 case V2DImode:
44099 /* For SSE4.1, we normally use vector set. But if the second
44100 element is zero and inter-unit moves are OK, we use movq
44101 instead. */
44102 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44103 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
44104 && one_var == 0));
44105 break;
44106 case V16QImode:
44107 case V4SImode:
44108 case V4SFmode:
44109 use_vector_set = TARGET_SSE4_1;
44110 break;
44111 case V8HImode:
44112 use_vector_set = TARGET_SSE2;
44113 break;
44114 case V4HImode:
44115 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44116 break;
44117 case V32QImode:
44118 case V16HImode:
44119 case V8SImode:
44120 case V8SFmode:
44121 case V4DFmode:
44122 use_vector_set = TARGET_AVX;
44123 break;
44124 case V4DImode:
44125 /* Use ix86_expand_vector_set in 64bit mode only. */
44126 use_vector_set = TARGET_AVX && TARGET_64BIT;
44127 break;
44128 default:
44129 break;
44132 if (use_vector_set)
44134 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
44135 var = force_reg (GET_MODE_INNER (mode), var);
44136 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44137 return true;
44140 switch (mode)
44142 case V2SFmode:
44143 case V2SImode:
44144 if (!mmx_ok)
44145 return false;
44146 /* FALLTHRU */
44148 case V2DFmode:
44149 case V2DImode:
44150 if (one_var != 0)
44151 return false;
44152 var = force_reg (GET_MODE_INNER (mode), var);
44153 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44154 emit_insn (gen_rtx_SET (target, x));
44155 return true;
44157 case V4SFmode:
44158 case V4SImode:
44159 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44160 new_target = gen_reg_rtx (mode);
44161 else
44162 new_target = target;
44163 var = force_reg (GET_MODE_INNER (mode), var);
44164 x = gen_rtx_VEC_DUPLICATE (mode, var);
44165 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44166 emit_insn (gen_rtx_SET (new_target, x));
44167 if (one_var != 0)
44169 /* We need to shuffle the value to the correct position, so
44170 create a new pseudo to store the intermediate result. */
44172 /* With SSE2, we can use the integer shuffle insns. */
44173 if (mode != V4SFmode && TARGET_SSE2)
44175 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44176 const1_rtx,
44177 GEN_INT (one_var == 1 ? 0 : 1),
44178 GEN_INT (one_var == 2 ? 0 : 1),
44179 GEN_INT (one_var == 3 ? 0 : 1)));
44180 if (target != new_target)
44181 emit_move_insn (target, new_target);
44182 return true;
44185 /* Otherwise convert the intermediate result to V4SFmode and
44186 use the SSE1 shuffle instructions. */
44187 if (mode != V4SFmode)
44189 tmp = gen_reg_rtx (V4SFmode);
44190 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44192 else
44193 tmp = new_target;
44195 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44196 const1_rtx,
44197 GEN_INT (one_var == 1 ? 0 : 1),
44198 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44199 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44201 if (mode != V4SFmode)
44202 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44203 else if (tmp != target)
44204 emit_move_insn (target, tmp);
44206 else if (target != new_target)
44207 emit_move_insn (target, new_target);
44208 return true;
44210 case V8HImode:
44211 case V16QImode:
44212 vsimode = V4SImode;
44213 goto widen;
44214 case V4HImode:
44215 case V8QImode:
44216 if (!mmx_ok)
44217 return false;
44218 vsimode = V2SImode;
44219 goto widen;
44220 widen:
44221 if (one_var != 0)
44222 return false;
44224 /* Zero extend the variable element to SImode and recurse. */
44225 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44227 x = gen_reg_rtx (vsimode);
44228 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44229 var, one_var))
44230 gcc_unreachable ();
44232 emit_move_insn (target, gen_lowpart (mode, x));
44233 return true;
44235 default:
44236 return false;
44240 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44241 consisting of the values in VALS. It is known that all elements
44242 except ONE_VAR are constants. Return true if successful. */
44244 static bool
44245 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44246 rtx target, rtx vals, int one_var)
44248 rtx var = XVECEXP (vals, 0, one_var);
44249 machine_mode wmode;
44250 rtx const_vec, x;
44252 const_vec = copy_rtx (vals);
44253 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44254 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44256 switch (mode)
44258 case V2DFmode:
44259 case V2DImode:
44260 case V2SFmode:
44261 case V2SImode:
44262 /* For the two element vectors, it's just as easy to use
44263 the general case. */
44264 return false;
44266 case V4DImode:
44267 /* Use ix86_expand_vector_set in 64bit mode only. */
44268 if (!TARGET_64BIT)
44269 return false;
44270 case V4DFmode:
44271 case V8SFmode:
44272 case V8SImode:
44273 case V16HImode:
44274 case V32QImode:
44275 case V4SFmode:
44276 case V4SImode:
44277 case V8HImode:
44278 case V4HImode:
44279 break;
44281 case V16QImode:
44282 if (TARGET_SSE4_1)
44283 break;
44284 wmode = V8HImode;
44285 goto widen;
44286 case V8QImode:
44287 wmode = V4HImode;
44288 goto widen;
44289 widen:
44290 /* There's no way to set one QImode entry easily. Combine
44291 the variable value with its adjacent constant value, and
44292 promote to an HImode set. */
44293 x = XVECEXP (vals, 0, one_var ^ 1);
44294 if (one_var & 1)
44296 var = convert_modes (HImode, QImode, var, true);
44297 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44298 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44299 x = GEN_INT (INTVAL (x) & 0xff);
44301 else
44303 var = convert_modes (HImode, QImode, var, true);
44304 x = gen_int_mode (INTVAL (x) << 8, HImode);
44306 if (x != const0_rtx)
44307 var = expand_simple_binop (HImode, IOR, var, x, var,
44308 1, OPTAB_LIB_WIDEN);
44310 x = gen_reg_rtx (wmode);
44311 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44312 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44314 emit_move_insn (target, gen_lowpart (mode, x));
44315 return true;
44317 default:
44318 return false;
44321 emit_move_insn (target, const_vec);
44322 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44323 return true;
44326 /* A subroutine of ix86_expand_vector_init_general. Use vector
44327 concatenate to handle the most general case: all values variable,
44328 and none identical. */
44330 static void
44331 ix86_expand_vector_init_concat (machine_mode mode,
44332 rtx target, rtx *ops, int n)
44334 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44335 rtx first[16], second[8], third[4];
44336 rtvec v;
44337 int i, j;
44339 switch (n)
44341 case 2:
44342 switch (mode)
44344 case V16SImode:
44345 cmode = V8SImode;
44346 break;
44347 case V16SFmode:
44348 cmode = V8SFmode;
44349 break;
44350 case V8DImode:
44351 cmode = V4DImode;
44352 break;
44353 case V8DFmode:
44354 cmode = V4DFmode;
44355 break;
44356 case V8SImode:
44357 cmode = V4SImode;
44358 break;
44359 case V8SFmode:
44360 cmode = V4SFmode;
44361 break;
44362 case V4DImode:
44363 cmode = V2DImode;
44364 break;
44365 case V4DFmode:
44366 cmode = V2DFmode;
44367 break;
44368 case V4SImode:
44369 cmode = V2SImode;
44370 break;
44371 case V4SFmode:
44372 cmode = V2SFmode;
44373 break;
44374 case V2DImode:
44375 cmode = DImode;
44376 break;
44377 case V2SImode:
44378 cmode = SImode;
44379 break;
44380 case V2DFmode:
44381 cmode = DFmode;
44382 break;
44383 case V2SFmode:
44384 cmode = SFmode;
44385 break;
44386 default:
44387 gcc_unreachable ();
44390 if (!register_operand (ops[1], cmode))
44391 ops[1] = force_reg (cmode, ops[1]);
44392 if (!register_operand (ops[0], cmode))
44393 ops[0] = force_reg (cmode, ops[0]);
44394 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44395 ops[1])));
44396 break;
44398 case 4:
44399 switch (mode)
44401 case V4DImode:
44402 cmode = V2DImode;
44403 break;
44404 case V4DFmode:
44405 cmode = V2DFmode;
44406 break;
44407 case V4SImode:
44408 cmode = V2SImode;
44409 break;
44410 case V4SFmode:
44411 cmode = V2SFmode;
44412 break;
44413 default:
44414 gcc_unreachable ();
44416 goto half;
44418 case 8:
44419 switch (mode)
44421 case V8DImode:
44422 cmode = V2DImode;
44423 hmode = V4DImode;
44424 break;
44425 case V8DFmode:
44426 cmode = V2DFmode;
44427 hmode = V4DFmode;
44428 break;
44429 case V8SImode:
44430 cmode = V2SImode;
44431 hmode = V4SImode;
44432 break;
44433 case V8SFmode:
44434 cmode = V2SFmode;
44435 hmode = V4SFmode;
44436 break;
44437 default:
44438 gcc_unreachable ();
44440 goto half;
44442 case 16:
44443 switch (mode)
44445 case V16SImode:
44446 cmode = V2SImode;
44447 hmode = V4SImode;
44448 gmode = V8SImode;
44449 break;
44450 case V16SFmode:
44451 cmode = V2SFmode;
44452 hmode = V4SFmode;
44453 gmode = V8SFmode;
44454 break;
44455 default:
44456 gcc_unreachable ();
44458 goto half;
44460 half:
44461 /* FIXME: We process inputs backward to help RA. PR 36222. */
44462 i = n - 1;
44463 j = (n >> 1) - 1;
44464 for (; i > 0; i -= 2, j--)
44466 first[j] = gen_reg_rtx (cmode);
44467 v = gen_rtvec (2, ops[i - 1], ops[i]);
44468 ix86_expand_vector_init (false, first[j],
44469 gen_rtx_PARALLEL (cmode, v));
44472 n >>= 1;
44473 if (n > 4)
44475 gcc_assert (hmode != VOIDmode);
44476 gcc_assert (gmode != VOIDmode);
44477 for (i = j = 0; i < n; i += 2, j++)
44479 second[j] = gen_reg_rtx (hmode);
44480 ix86_expand_vector_init_concat (hmode, second [j],
44481 &first [i], 2);
44483 n >>= 1;
44484 for (i = j = 0; i < n; i += 2, j++)
44486 third[j] = gen_reg_rtx (gmode);
44487 ix86_expand_vector_init_concat (gmode, third[j],
44488 &second[i], 2);
44490 n >>= 1;
44491 ix86_expand_vector_init_concat (mode, target, third, n);
44493 else if (n > 2)
44495 gcc_assert (hmode != VOIDmode);
44496 for (i = j = 0; i < n; i += 2, j++)
44498 second[j] = gen_reg_rtx (hmode);
44499 ix86_expand_vector_init_concat (hmode, second [j],
44500 &first [i], 2);
44502 n >>= 1;
44503 ix86_expand_vector_init_concat (mode, target, second, n);
44505 else
44506 ix86_expand_vector_init_concat (mode, target, first, n);
44507 break;
44509 default:
44510 gcc_unreachable ();
44514 /* A subroutine of ix86_expand_vector_init_general. Use vector
44515 interleave to handle the most general case: all values variable,
44516 and none identical. */
44518 static void
44519 ix86_expand_vector_init_interleave (machine_mode mode,
44520 rtx target, rtx *ops, int n)
44522 machine_mode first_imode, second_imode, third_imode, inner_mode;
44523 int i, j;
44524 rtx op0, op1;
44525 rtx (*gen_load_even) (rtx, rtx, rtx);
44526 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44527 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44529 switch (mode)
44531 case V8HImode:
44532 gen_load_even = gen_vec_setv8hi;
44533 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44534 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44535 inner_mode = HImode;
44536 first_imode = V4SImode;
44537 second_imode = V2DImode;
44538 third_imode = VOIDmode;
44539 break;
44540 case V16QImode:
44541 gen_load_even = gen_vec_setv16qi;
44542 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44543 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44544 inner_mode = QImode;
44545 first_imode = V8HImode;
44546 second_imode = V4SImode;
44547 third_imode = V2DImode;
44548 break;
44549 default:
44550 gcc_unreachable ();
44553 for (i = 0; i < n; i++)
44555 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44556 op0 = gen_reg_rtx (SImode);
44557 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44559 /* Insert the SImode value as low element of V4SImode vector. */
44560 op1 = gen_reg_rtx (V4SImode);
44561 op0 = gen_rtx_VEC_MERGE (V4SImode,
44562 gen_rtx_VEC_DUPLICATE (V4SImode,
44563 op0),
44564 CONST0_RTX (V4SImode),
44565 const1_rtx);
44566 emit_insn (gen_rtx_SET (op1, op0));
44568 /* Cast the V4SImode vector back to a vector in orignal mode. */
44569 op0 = gen_reg_rtx (mode);
44570 emit_move_insn (op0, gen_lowpart (mode, op1));
44572 /* Load even elements into the second position. */
44573 emit_insn (gen_load_even (op0,
44574 force_reg (inner_mode,
44575 ops [i + i + 1]),
44576 const1_rtx));
44578 /* Cast vector to FIRST_IMODE vector. */
44579 ops[i] = gen_reg_rtx (first_imode);
44580 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44583 /* Interleave low FIRST_IMODE vectors. */
44584 for (i = j = 0; i < n; i += 2, j++)
44586 op0 = gen_reg_rtx (first_imode);
44587 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44589 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44590 ops[j] = gen_reg_rtx (second_imode);
44591 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44594 /* Interleave low SECOND_IMODE vectors. */
44595 switch (second_imode)
44597 case V4SImode:
44598 for (i = j = 0; i < n / 2; i += 2, j++)
44600 op0 = gen_reg_rtx (second_imode);
44601 emit_insn (gen_interleave_second_low (op0, ops[i],
44602 ops[i + 1]));
44604 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44605 vector. */
44606 ops[j] = gen_reg_rtx (third_imode);
44607 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44609 second_imode = V2DImode;
44610 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44611 /* FALLTHRU */
44613 case V2DImode:
44614 op0 = gen_reg_rtx (second_imode);
44615 emit_insn (gen_interleave_second_low (op0, ops[0],
44616 ops[1]));
44618 /* Cast the SECOND_IMODE vector back to a vector on original
44619 mode. */
44620 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44621 break;
44623 default:
44624 gcc_unreachable ();
44628 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44629 all values variable, and none identical. */
44631 static void
44632 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44633 rtx target, rtx vals)
44635 rtx ops[64], op0, op1, op2, op3, op4, op5;
44636 machine_mode half_mode = VOIDmode;
44637 machine_mode quarter_mode = VOIDmode;
44638 int n, i;
44640 switch (mode)
44642 case V2SFmode:
44643 case V2SImode:
44644 if (!mmx_ok && !TARGET_SSE)
44645 break;
44646 /* FALLTHRU */
44648 case V16SImode:
44649 case V16SFmode:
44650 case V8DFmode:
44651 case V8DImode:
44652 case V8SFmode:
44653 case V8SImode:
44654 case V4DFmode:
44655 case V4DImode:
44656 case V4SFmode:
44657 case V4SImode:
44658 case V2DFmode:
44659 case V2DImode:
44660 n = GET_MODE_NUNITS (mode);
44661 for (i = 0; i < n; i++)
44662 ops[i] = XVECEXP (vals, 0, i);
44663 ix86_expand_vector_init_concat (mode, target, ops, n);
44664 return;
44666 case V32QImode:
44667 half_mode = V16QImode;
44668 goto half;
44670 case V16HImode:
44671 half_mode = V8HImode;
44672 goto half;
44674 half:
44675 n = GET_MODE_NUNITS (mode);
44676 for (i = 0; i < n; i++)
44677 ops[i] = XVECEXP (vals, 0, i);
44678 op0 = gen_reg_rtx (half_mode);
44679 op1 = gen_reg_rtx (half_mode);
44680 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44681 n >> 2);
44682 ix86_expand_vector_init_interleave (half_mode, op1,
44683 &ops [n >> 1], n >> 2);
44684 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44685 return;
44687 case V64QImode:
44688 quarter_mode = V16QImode;
44689 half_mode = V32QImode;
44690 goto quarter;
44692 case V32HImode:
44693 quarter_mode = V8HImode;
44694 half_mode = V16HImode;
44695 goto quarter;
44697 quarter:
44698 n = GET_MODE_NUNITS (mode);
44699 for (i = 0; i < n; i++)
44700 ops[i] = XVECEXP (vals, 0, i);
44701 op0 = gen_reg_rtx (quarter_mode);
44702 op1 = gen_reg_rtx (quarter_mode);
44703 op2 = gen_reg_rtx (quarter_mode);
44704 op3 = gen_reg_rtx (quarter_mode);
44705 op4 = gen_reg_rtx (half_mode);
44706 op5 = gen_reg_rtx (half_mode);
44707 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44708 n >> 3);
44709 ix86_expand_vector_init_interleave (quarter_mode, op1,
44710 &ops [n >> 2], n >> 3);
44711 ix86_expand_vector_init_interleave (quarter_mode, op2,
44712 &ops [n >> 1], n >> 3);
44713 ix86_expand_vector_init_interleave (quarter_mode, op3,
44714 &ops [(n >> 1) | (n >> 2)], n >> 3);
44715 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44716 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44717 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44718 return;
44720 case V16QImode:
44721 if (!TARGET_SSE4_1)
44722 break;
44723 /* FALLTHRU */
44725 case V8HImode:
44726 if (!TARGET_SSE2)
44727 break;
44729 /* Don't use ix86_expand_vector_init_interleave if we can't
44730 move from GPR to SSE register directly. */
44731 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44732 break;
44734 n = GET_MODE_NUNITS (mode);
44735 for (i = 0; i < n; i++)
44736 ops[i] = XVECEXP (vals, 0, i);
44737 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44738 return;
44740 case V4HImode:
44741 case V8QImode:
44742 break;
44744 default:
44745 gcc_unreachable ();
44749 int i, j, n_elts, n_words, n_elt_per_word;
44750 machine_mode inner_mode;
44751 rtx words[4], shift;
44753 inner_mode = GET_MODE_INNER (mode);
44754 n_elts = GET_MODE_NUNITS (mode);
44755 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44756 n_elt_per_word = n_elts / n_words;
44757 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44759 for (i = 0; i < n_words; ++i)
44761 rtx word = NULL_RTX;
44763 for (j = 0; j < n_elt_per_word; ++j)
44765 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44766 elt = convert_modes (word_mode, inner_mode, elt, true);
44768 if (j == 0)
44769 word = elt;
44770 else
44772 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44773 word, 1, OPTAB_LIB_WIDEN);
44774 word = expand_simple_binop (word_mode, IOR, word, elt,
44775 word, 1, OPTAB_LIB_WIDEN);
44779 words[i] = word;
44782 if (n_words == 1)
44783 emit_move_insn (target, gen_lowpart (mode, words[0]));
44784 else if (n_words == 2)
44786 rtx tmp = gen_reg_rtx (mode);
44787 emit_clobber (tmp);
44788 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44789 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44790 emit_move_insn (target, tmp);
44792 else if (n_words == 4)
44794 rtx tmp = gen_reg_rtx (V4SImode);
44795 gcc_assert (word_mode == SImode);
44796 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44797 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44798 emit_move_insn (target, gen_lowpart (mode, tmp));
44800 else
44801 gcc_unreachable ();
44805 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44806 instructions unless MMX_OK is true. */
44808 void
44809 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44811 machine_mode mode = GET_MODE (target);
44812 machine_mode inner_mode = GET_MODE_INNER (mode);
44813 int n_elts = GET_MODE_NUNITS (mode);
44814 int n_var = 0, one_var = -1;
44815 bool all_same = true, all_const_zero = true;
44816 int i;
44817 rtx x;
44819 for (i = 0; i < n_elts; ++i)
44821 x = XVECEXP (vals, 0, i);
44822 if (!(CONST_SCALAR_INT_P (x)
44823 || CONST_DOUBLE_P (x)
44824 || CONST_FIXED_P (x)))
44825 n_var++, one_var = i;
44826 else if (x != CONST0_RTX (inner_mode))
44827 all_const_zero = false;
44828 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44829 all_same = false;
44832 /* Constants are best loaded from the constant pool. */
44833 if (n_var == 0)
44835 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44836 return;
44839 /* If all values are identical, broadcast the value. */
44840 if (all_same
44841 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44842 XVECEXP (vals, 0, 0)))
44843 return;
44845 /* Values where only one field is non-constant are best loaded from
44846 the pool and overwritten via move later. */
44847 if (n_var == 1)
44849 if (all_const_zero
44850 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44851 XVECEXP (vals, 0, one_var),
44852 one_var))
44853 return;
44855 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44856 return;
44859 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44862 void
44863 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44865 machine_mode mode = GET_MODE (target);
44866 machine_mode inner_mode = GET_MODE_INNER (mode);
44867 machine_mode half_mode;
44868 bool use_vec_merge = false;
44869 rtx tmp;
44870 static rtx (*gen_extract[6][2]) (rtx, rtx)
44872 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44873 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44874 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44875 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44876 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44877 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44879 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44881 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44882 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44883 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44884 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44885 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44886 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44888 int i, j, n;
44889 machine_mode mmode = VOIDmode;
44890 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44892 switch (mode)
44894 case V2SFmode:
44895 case V2SImode:
44896 if (mmx_ok)
44898 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44899 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44900 if (elt == 0)
44901 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44902 else
44903 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44904 emit_insn (gen_rtx_SET (target, tmp));
44905 return;
44907 break;
44909 case V2DImode:
44910 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44911 if (use_vec_merge)
44912 break;
44914 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44915 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44916 if (elt == 0)
44917 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44918 else
44919 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44920 emit_insn (gen_rtx_SET (target, tmp));
44921 return;
44923 case V2DFmode:
44925 rtx op0, op1;
44927 /* For the two element vectors, we implement a VEC_CONCAT with
44928 the extraction of the other element. */
44930 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44931 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44933 if (elt == 0)
44934 op0 = val, op1 = tmp;
44935 else
44936 op0 = tmp, op1 = val;
44938 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44939 emit_insn (gen_rtx_SET (target, tmp));
44941 return;
44943 case V4SFmode:
44944 use_vec_merge = TARGET_SSE4_1;
44945 if (use_vec_merge)
44946 break;
44948 switch (elt)
44950 case 0:
44951 use_vec_merge = true;
44952 break;
44954 case 1:
44955 /* tmp = target = A B C D */
44956 tmp = copy_to_reg (target);
44957 /* target = A A B B */
44958 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44959 /* target = X A B B */
44960 ix86_expand_vector_set (false, target, val, 0);
44961 /* target = A X C D */
44962 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44963 const1_rtx, const0_rtx,
44964 GEN_INT (2+4), GEN_INT (3+4)));
44965 return;
44967 case 2:
44968 /* tmp = target = A B C D */
44969 tmp = copy_to_reg (target);
44970 /* tmp = X B C D */
44971 ix86_expand_vector_set (false, tmp, val, 0);
44972 /* target = A B X D */
44973 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44974 const0_rtx, const1_rtx,
44975 GEN_INT (0+4), GEN_INT (3+4)));
44976 return;
44978 case 3:
44979 /* tmp = target = A B C D */
44980 tmp = copy_to_reg (target);
44981 /* tmp = X B C D */
44982 ix86_expand_vector_set (false, tmp, val, 0);
44983 /* target = A B X D */
44984 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
44985 const0_rtx, const1_rtx,
44986 GEN_INT (2+4), GEN_INT (0+4)));
44987 return;
44989 default:
44990 gcc_unreachable ();
44992 break;
44994 case V4SImode:
44995 use_vec_merge = TARGET_SSE4_1;
44996 if (use_vec_merge)
44997 break;
44999 /* Element 0 handled by vec_merge below. */
45000 if (elt == 0)
45002 use_vec_merge = true;
45003 break;
45006 if (TARGET_SSE2)
45008 /* With SSE2, use integer shuffles to swap element 0 and ELT,
45009 store into element 0, then shuffle them back. */
45011 rtx order[4];
45013 order[0] = GEN_INT (elt);
45014 order[1] = const1_rtx;
45015 order[2] = const2_rtx;
45016 order[3] = GEN_INT (3);
45017 order[elt] = const0_rtx;
45019 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45020 order[1], order[2], order[3]));
45022 ix86_expand_vector_set (false, target, val, 0);
45024 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45025 order[1], order[2], order[3]));
45027 else
45029 /* For SSE1, we have to reuse the V4SF code. */
45030 rtx t = gen_reg_rtx (V4SFmode);
45031 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45032 emit_move_insn (target, gen_lowpart (mode, t));
45034 return;
45036 case V8HImode:
45037 use_vec_merge = TARGET_SSE2;
45038 break;
45039 case V4HImode:
45040 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45041 break;
45043 case V16QImode:
45044 use_vec_merge = TARGET_SSE4_1;
45045 break;
45047 case V8QImode:
45048 break;
45050 case V32QImode:
45051 half_mode = V16QImode;
45052 j = 0;
45053 n = 16;
45054 goto half;
45056 case V16HImode:
45057 half_mode = V8HImode;
45058 j = 1;
45059 n = 8;
45060 goto half;
45062 case V8SImode:
45063 half_mode = V4SImode;
45064 j = 2;
45065 n = 4;
45066 goto half;
45068 case V4DImode:
45069 half_mode = V2DImode;
45070 j = 3;
45071 n = 2;
45072 goto half;
45074 case V8SFmode:
45075 half_mode = V4SFmode;
45076 j = 4;
45077 n = 4;
45078 goto half;
45080 case V4DFmode:
45081 half_mode = V2DFmode;
45082 j = 5;
45083 n = 2;
45084 goto half;
45086 half:
45087 /* Compute offset. */
45088 i = elt / n;
45089 elt %= n;
45091 gcc_assert (i <= 1);
45093 /* Extract the half. */
45094 tmp = gen_reg_rtx (half_mode);
45095 emit_insn (gen_extract[j][i] (tmp, target));
45097 /* Put val in tmp at elt. */
45098 ix86_expand_vector_set (false, tmp, val, elt);
45100 /* Put it back. */
45101 emit_insn (gen_insert[j][i] (target, target, tmp));
45102 return;
45104 case V8DFmode:
45105 if (TARGET_AVX512F)
45107 mmode = QImode;
45108 gen_blendm = gen_avx512f_blendmv8df;
45110 break;
45112 case V8DImode:
45113 if (TARGET_AVX512F)
45115 mmode = QImode;
45116 gen_blendm = gen_avx512f_blendmv8di;
45118 break;
45120 case V16SFmode:
45121 if (TARGET_AVX512F)
45123 mmode = HImode;
45124 gen_blendm = gen_avx512f_blendmv16sf;
45126 break;
45128 case V16SImode:
45129 if (TARGET_AVX512F)
45131 mmode = HImode;
45132 gen_blendm = gen_avx512f_blendmv16si;
45134 break;
45136 case V32HImode:
45137 if (TARGET_AVX512F && TARGET_AVX512BW)
45139 mmode = SImode;
45140 gen_blendm = gen_avx512bw_blendmv32hi;
45142 break;
45144 case V64QImode:
45145 if (TARGET_AVX512F && TARGET_AVX512BW)
45147 mmode = DImode;
45148 gen_blendm = gen_avx512bw_blendmv64qi;
45150 break;
45152 default:
45153 break;
45156 if (mmode != VOIDmode)
45158 tmp = gen_reg_rtx (mode);
45159 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
45160 emit_insn (gen_blendm (target, tmp, target,
45161 force_reg (mmode,
45162 gen_int_mode (1 << elt, mmode))));
45164 else if (use_vec_merge)
45166 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45167 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45168 emit_insn (gen_rtx_SET (target, tmp));
45170 else
45172 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45174 emit_move_insn (mem, target);
45176 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45177 emit_move_insn (tmp, val);
45179 emit_move_insn (target, mem);
45183 void
45184 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45186 machine_mode mode = GET_MODE (vec);
45187 machine_mode inner_mode = GET_MODE_INNER (mode);
45188 bool use_vec_extr = false;
45189 rtx tmp;
45191 switch (mode)
45193 case V2SImode:
45194 case V2SFmode:
45195 if (!mmx_ok)
45196 break;
45197 /* FALLTHRU */
45199 case V2DFmode:
45200 case V2DImode:
45201 use_vec_extr = true;
45202 break;
45204 case V4SFmode:
45205 use_vec_extr = TARGET_SSE4_1;
45206 if (use_vec_extr)
45207 break;
45209 switch (elt)
45211 case 0:
45212 tmp = vec;
45213 break;
45215 case 1:
45216 case 3:
45217 tmp = gen_reg_rtx (mode);
45218 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45219 GEN_INT (elt), GEN_INT (elt),
45220 GEN_INT (elt+4), GEN_INT (elt+4)));
45221 break;
45223 case 2:
45224 tmp = gen_reg_rtx (mode);
45225 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45226 break;
45228 default:
45229 gcc_unreachable ();
45231 vec = tmp;
45232 use_vec_extr = true;
45233 elt = 0;
45234 break;
45236 case V4SImode:
45237 use_vec_extr = TARGET_SSE4_1;
45238 if (use_vec_extr)
45239 break;
45241 if (TARGET_SSE2)
45243 switch (elt)
45245 case 0:
45246 tmp = vec;
45247 break;
45249 case 1:
45250 case 3:
45251 tmp = gen_reg_rtx (mode);
45252 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45253 GEN_INT (elt), GEN_INT (elt),
45254 GEN_INT (elt), GEN_INT (elt)));
45255 break;
45257 case 2:
45258 tmp = gen_reg_rtx (mode);
45259 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45260 break;
45262 default:
45263 gcc_unreachable ();
45265 vec = tmp;
45266 use_vec_extr = true;
45267 elt = 0;
45269 else
45271 /* For SSE1, we have to reuse the V4SF code. */
45272 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45273 gen_lowpart (V4SFmode, vec), elt);
45274 return;
45276 break;
45278 case V8HImode:
45279 use_vec_extr = TARGET_SSE2;
45280 break;
45281 case V4HImode:
45282 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45283 break;
45285 case V16QImode:
45286 use_vec_extr = TARGET_SSE4_1;
45287 break;
45289 case V8SFmode:
45290 if (TARGET_AVX)
45292 tmp = gen_reg_rtx (V4SFmode);
45293 if (elt < 4)
45294 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45295 else
45296 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45297 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45298 return;
45300 break;
45302 case V4DFmode:
45303 if (TARGET_AVX)
45305 tmp = gen_reg_rtx (V2DFmode);
45306 if (elt < 2)
45307 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45308 else
45309 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45310 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45311 return;
45313 break;
45315 case V32QImode:
45316 if (TARGET_AVX)
45318 tmp = gen_reg_rtx (V16QImode);
45319 if (elt < 16)
45320 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45321 else
45322 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45323 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45324 return;
45326 break;
45328 case V16HImode:
45329 if (TARGET_AVX)
45331 tmp = gen_reg_rtx (V8HImode);
45332 if (elt < 8)
45333 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45334 else
45335 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45336 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45337 return;
45339 break;
45341 case V8SImode:
45342 if (TARGET_AVX)
45344 tmp = gen_reg_rtx (V4SImode);
45345 if (elt < 4)
45346 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45347 else
45348 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45349 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45350 return;
45352 break;
45354 case V4DImode:
45355 if (TARGET_AVX)
45357 tmp = gen_reg_rtx (V2DImode);
45358 if (elt < 2)
45359 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45360 else
45361 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45362 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45363 return;
45365 break;
45367 case V32HImode:
45368 if (TARGET_AVX512BW)
45370 tmp = gen_reg_rtx (V16HImode);
45371 if (elt < 16)
45372 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45373 else
45374 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45375 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45376 return;
45378 break;
45380 case V64QImode:
45381 if (TARGET_AVX512BW)
45383 tmp = gen_reg_rtx (V32QImode);
45384 if (elt < 32)
45385 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45386 else
45387 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45388 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45389 return;
45391 break;
45393 case V16SFmode:
45394 tmp = gen_reg_rtx (V8SFmode);
45395 if (elt < 8)
45396 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45397 else
45398 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45399 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45400 return;
45402 case V8DFmode:
45403 tmp = gen_reg_rtx (V4DFmode);
45404 if (elt < 4)
45405 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45406 else
45407 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45408 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45409 return;
45411 case V16SImode:
45412 tmp = gen_reg_rtx (V8SImode);
45413 if (elt < 8)
45414 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45415 else
45416 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45417 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45418 return;
45420 case V8DImode:
45421 tmp = gen_reg_rtx (V4DImode);
45422 if (elt < 4)
45423 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45424 else
45425 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45426 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45427 return;
45429 case V8QImode:
45430 /* ??? Could extract the appropriate HImode element and shift. */
45431 default:
45432 break;
45435 if (use_vec_extr)
45437 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45438 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45440 /* Let the rtl optimizers know about the zero extension performed. */
45441 if (inner_mode == QImode || inner_mode == HImode)
45443 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45444 target = gen_lowpart (SImode, target);
45447 emit_insn (gen_rtx_SET (target, tmp));
45449 else
45451 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45453 emit_move_insn (mem, vec);
45455 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45456 emit_move_insn (target, tmp);
45460 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45461 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45462 The upper bits of DEST are undefined, though they shouldn't cause
45463 exceptions (some bits from src or all zeros are ok). */
45465 static void
45466 emit_reduc_half (rtx dest, rtx src, int i)
45468 rtx tem, d = dest;
45469 switch (GET_MODE (src))
45471 case V4SFmode:
45472 if (i == 128)
45473 tem = gen_sse_movhlps (dest, src, src);
45474 else
45475 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45476 GEN_INT (1 + 4), GEN_INT (1 + 4));
45477 break;
45478 case V2DFmode:
45479 tem = gen_vec_interleave_highv2df (dest, src, src);
45480 break;
45481 case V16QImode:
45482 case V8HImode:
45483 case V4SImode:
45484 case V2DImode:
45485 d = gen_reg_rtx (V1TImode);
45486 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45487 GEN_INT (i / 2));
45488 break;
45489 case V8SFmode:
45490 if (i == 256)
45491 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45492 else
45493 tem = gen_avx_shufps256 (dest, src, src,
45494 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45495 break;
45496 case V4DFmode:
45497 if (i == 256)
45498 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45499 else
45500 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45501 break;
45502 case V32QImode:
45503 case V16HImode:
45504 case V8SImode:
45505 case V4DImode:
45506 if (i == 256)
45508 if (GET_MODE (dest) != V4DImode)
45509 d = gen_reg_rtx (V4DImode);
45510 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45511 gen_lowpart (V4DImode, src),
45512 const1_rtx);
45514 else
45516 d = gen_reg_rtx (V2TImode);
45517 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45518 GEN_INT (i / 2));
45520 break;
45521 case V64QImode:
45522 case V32HImode:
45523 case V16SImode:
45524 case V16SFmode:
45525 case V8DImode:
45526 case V8DFmode:
45527 if (i > 128)
45528 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45529 gen_lowpart (V16SImode, src),
45530 gen_lowpart (V16SImode, src),
45531 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45532 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45533 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45534 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45535 GEN_INT (0xC), GEN_INT (0xD),
45536 GEN_INT (0xE), GEN_INT (0xF),
45537 GEN_INT (0x10), GEN_INT (0x11),
45538 GEN_INT (0x12), GEN_INT (0x13),
45539 GEN_INT (0x14), GEN_INT (0x15),
45540 GEN_INT (0x16), GEN_INT (0x17));
45541 else
45542 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45543 gen_lowpart (V16SImode, src),
45544 GEN_INT (i == 128 ? 0x2 : 0x1),
45545 GEN_INT (0x3),
45546 GEN_INT (0x3),
45547 GEN_INT (0x3),
45548 GEN_INT (i == 128 ? 0x6 : 0x5),
45549 GEN_INT (0x7),
45550 GEN_INT (0x7),
45551 GEN_INT (0x7),
45552 GEN_INT (i == 128 ? 0xA : 0x9),
45553 GEN_INT (0xB),
45554 GEN_INT (0xB),
45555 GEN_INT (0xB),
45556 GEN_INT (i == 128 ? 0xE : 0xD),
45557 GEN_INT (0xF),
45558 GEN_INT (0xF),
45559 GEN_INT (0xF));
45560 break;
45561 default:
45562 gcc_unreachable ();
45564 emit_insn (tem);
45565 if (d != dest)
45566 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45569 /* Expand a vector reduction. FN is the binary pattern to reduce;
45570 DEST is the destination; IN is the input vector. */
45572 void
45573 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45575 rtx half, dst, vec = in;
45576 machine_mode mode = GET_MODE (in);
45577 int i;
45579 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45580 if (TARGET_SSE4_1
45581 && mode == V8HImode
45582 && fn == gen_uminv8hi3)
45584 emit_insn (gen_sse4_1_phminposuw (dest, in));
45585 return;
45588 for (i = GET_MODE_BITSIZE (mode);
45589 i > GET_MODE_UNIT_BITSIZE (mode);
45590 i >>= 1)
45592 half = gen_reg_rtx (mode);
45593 emit_reduc_half (half, vec, i);
45594 if (i == GET_MODE_UNIT_BITSIZE (mode) * 2)
45595 dst = dest;
45596 else
45597 dst = gen_reg_rtx (mode);
45598 emit_insn (fn (dst, half, vec));
45599 vec = dst;
45603 /* Target hook for scalar_mode_supported_p. */
45604 static bool
45605 ix86_scalar_mode_supported_p (machine_mode mode)
45607 if (DECIMAL_FLOAT_MODE_P (mode))
45608 return default_decimal_float_supported_p ();
45609 else if (mode == TFmode)
45610 return true;
45611 else
45612 return default_scalar_mode_supported_p (mode);
45615 /* Implements target hook vector_mode_supported_p. */
45616 static bool
45617 ix86_vector_mode_supported_p (machine_mode mode)
45619 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45620 return true;
45621 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45622 return true;
45623 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45624 return true;
45625 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45626 return true;
45627 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45628 return true;
45629 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45630 return true;
45631 return false;
45634 /* Implement target hook libgcc_floating_mode_supported_p. */
45635 static bool
45636 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45638 switch (mode)
45640 case SFmode:
45641 case DFmode:
45642 case XFmode:
45643 return true;
45645 case TFmode:
45646 #ifdef IX86_NO_LIBGCC_TFMODE
45647 return false;
45648 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45649 return TARGET_LONG_DOUBLE_128;
45650 #else
45651 return true;
45652 #endif
45654 default:
45655 return false;
45659 /* Target hook for c_mode_for_suffix. */
45660 static machine_mode
45661 ix86_c_mode_for_suffix (char suffix)
45663 if (suffix == 'q')
45664 return TFmode;
45665 if (suffix == 'w')
45666 return XFmode;
45668 return VOIDmode;
45671 /* Worker function for TARGET_MD_ASM_ADJUST.
45673 We implement asm flag outputs, and maintain source compatibility
45674 with the old cc0-based compiler. */
45676 static rtx_insn *
45677 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
45678 vec<const char *> &constraints,
45679 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45681 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45682 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45684 bool saw_asm_flag = false;
45686 start_sequence ();
45687 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
45689 const char *con = constraints[i];
45690 if (strncmp (con, "=@cc", 4) != 0)
45691 continue;
45692 con += 4;
45693 if (strchr (con, ',') != NULL)
45695 error ("alternatives not allowed in asm flag output");
45696 continue;
45699 bool invert = false;
45700 if (con[0] == 'n')
45701 invert = true, con++;
45703 machine_mode mode = CCmode;
45704 rtx_code code = UNKNOWN;
45706 switch (con[0])
45708 case 'a':
45709 if (con[1] == 0)
45710 mode = CCAmode, code = EQ;
45711 else if (con[1] == 'e' && con[2] == 0)
45712 mode = CCCmode, code = EQ;
45713 break;
45714 case 'b':
45715 if (con[1] == 0)
45716 mode = CCCmode, code = EQ;
45717 else if (con[1] == 'e' && con[2] == 0)
45718 mode = CCAmode, code = NE;
45719 break;
45720 case 'c':
45721 if (con[1] == 0)
45722 mode = CCCmode, code = EQ;
45723 break;
45724 case 'e':
45725 if (con[1] == 0)
45726 mode = CCZmode, code = EQ;
45727 break;
45728 case 'g':
45729 if (con[1] == 0)
45730 mode = CCGCmode, code = GT;
45731 else if (con[1] == 'e' && con[2] == 0)
45732 mode = CCGCmode, code = GE;
45733 break;
45734 case 'l':
45735 if (con[1] == 0)
45736 mode = CCGCmode, code = LT;
45737 else if (con[1] == 'e' && con[2] == 0)
45738 mode = CCGCmode, code = LE;
45739 break;
45740 case 'o':
45741 if (con[1] == 0)
45742 mode = CCOmode, code = EQ;
45743 break;
45744 case 'p':
45745 if (con[1] == 0)
45746 mode = CCPmode, code = EQ;
45747 break;
45748 case 's':
45749 if (con[1] == 0)
45750 mode = CCSmode, code = EQ;
45751 break;
45752 case 'z':
45753 if (con[1] == 0)
45754 mode = CCZmode, code = EQ;
45755 break;
45757 if (code == UNKNOWN)
45759 error ("unknown asm flag output %qs", constraints[i]);
45760 continue;
45762 if (invert)
45763 code = reverse_condition (code);
45765 rtx dest = outputs[i];
45766 if (!saw_asm_flag)
45768 /* This is the first asm flag output. Here we put the flags
45769 register in as the real output and adjust the condition to
45770 allow it. */
45771 constraints[i] = "=Bf";
45772 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
45773 saw_asm_flag = true;
45775 else
45777 /* We don't need the flags register as output twice. */
45778 constraints[i] = "=X";
45779 outputs[i] = gen_rtx_SCRATCH (SImode);
45782 rtx x = gen_rtx_REG (mode, FLAGS_REG);
45783 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
45785 machine_mode dest_mode = GET_MODE (dest);
45786 if (!SCALAR_INT_MODE_P (dest_mode))
45788 error ("invalid type for asm flag output");
45789 continue;
45792 if (dest_mode == DImode && !TARGET_64BIT)
45793 dest_mode = SImode;
45795 if (dest_mode != QImode)
45797 rtx destqi = gen_reg_rtx (QImode);
45798 emit_insn (gen_rtx_SET (destqi, x));
45800 if (TARGET_ZERO_EXTEND_WITH_AND
45801 && optimize_function_for_speed_p (cfun))
45803 x = force_reg (dest_mode, const0_rtx);
45805 emit_insn (gen_movstrictqi
45806 (gen_lowpart (QImode, x), destqi));
45808 else
45809 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
45812 if (dest_mode != GET_MODE (dest))
45814 rtx tmp = gen_reg_rtx (SImode);
45816 emit_insn (gen_rtx_SET (tmp, x));
45817 emit_insn (gen_zero_extendsidi2 (dest, tmp));
45819 else
45820 emit_insn (gen_rtx_SET (dest, x));
45822 rtx_insn *seq = get_insns ();
45823 end_sequence ();
45825 if (saw_asm_flag)
45826 return seq;
45827 else
45829 /* If we had no asm flag outputs, clobber the flags. */
45830 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45831 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45832 return NULL;
45836 /* Implements target vector targetm.asm.encode_section_info. */
45838 static void ATTRIBUTE_UNUSED
45839 ix86_encode_section_info (tree decl, rtx rtl, int first)
45841 default_encode_section_info (decl, rtl, first);
45843 if (ix86_in_large_data_p (decl))
45844 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45847 /* Worker function for REVERSE_CONDITION. */
45849 enum rtx_code
45850 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45852 return (mode != CCFPmode && mode != CCFPUmode
45853 ? reverse_condition (code)
45854 : reverse_condition_maybe_unordered (code));
45857 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45858 to OPERANDS[0]. */
45860 const char *
45861 output_387_reg_move (rtx insn, rtx *operands)
45863 if (REG_P (operands[0]))
45865 if (REG_P (operands[1])
45866 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45868 if (REGNO (operands[0]) == FIRST_STACK_REG)
45869 return output_387_ffreep (operands, 0);
45870 return "fstp\t%y0";
45872 if (STACK_TOP_P (operands[0]))
45873 return "fld%Z1\t%y1";
45874 return "fst\t%y0";
45876 else if (MEM_P (operands[0]))
45878 gcc_assert (REG_P (operands[1]));
45879 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45880 return "fstp%Z0\t%y0";
45881 else
45883 /* There is no non-popping store to memory for XFmode.
45884 So if we need one, follow the store with a load. */
45885 if (GET_MODE (operands[0]) == XFmode)
45886 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45887 else
45888 return "fst%Z0\t%y0";
45891 else
45892 gcc_unreachable();
45895 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45896 FP status register is set. */
45898 void
45899 ix86_emit_fp_unordered_jump (rtx label)
45901 rtx reg = gen_reg_rtx (HImode);
45902 rtx temp;
45904 emit_insn (gen_x86_fnstsw_1 (reg));
45906 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45908 emit_insn (gen_x86_sahf_1 (reg));
45910 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45911 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45913 else
45915 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45917 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45918 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45921 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45922 gen_rtx_LABEL_REF (VOIDmode, label),
45923 pc_rtx);
45924 temp = gen_rtx_SET (pc_rtx, temp);
45926 emit_jump_insn (temp);
45927 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45930 /* Output code to perform a log1p XFmode calculation. */
45932 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45934 rtx_code_label *label1 = gen_label_rtx ();
45935 rtx_code_label *label2 = gen_label_rtx ();
45937 rtx tmp = gen_reg_rtx (XFmode);
45938 rtx tmp2 = gen_reg_rtx (XFmode);
45939 rtx test;
45941 emit_insn (gen_absxf2 (tmp, op1));
45942 test = gen_rtx_GE (VOIDmode, tmp,
45943 CONST_DOUBLE_FROM_REAL_VALUE (
45944 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45945 XFmode));
45946 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45948 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45949 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45950 emit_jump (label2);
45952 emit_label (label1);
45953 emit_move_insn (tmp, CONST1_RTX (XFmode));
45954 emit_insn (gen_addxf3 (tmp, op1, tmp));
45955 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45956 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45958 emit_label (label2);
45961 /* Emit code for round calculation. */
45962 void ix86_emit_i387_round (rtx op0, rtx op1)
45964 machine_mode inmode = GET_MODE (op1);
45965 machine_mode outmode = GET_MODE (op0);
45966 rtx e1, e2, res, tmp, tmp1, half;
45967 rtx scratch = gen_reg_rtx (HImode);
45968 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45969 rtx_code_label *jump_label = gen_label_rtx ();
45970 rtx insn;
45971 rtx (*gen_abs) (rtx, rtx);
45972 rtx (*gen_neg) (rtx, rtx);
45974 switch (inmode)
45976 case SFmode:
45977 gen_abs = gen_abssf2;
45978 break;
45979 case DFmode:
45980 gen_abs = gen_absdf2;
45981 break;
45982 case XFmode:
45983 gen_abs = gen_absxf2;
45984 break;
45985 default:
45986 gcc_unreachable ();
45989 switch (outmode)
45991 case SFmode:
45992 gen_neg = gen_negsf2;
45993 break;
45994 case DFmode:
45995 gen_neg = gen_negdf2;
45996 break;
45997 case XFmode:
45998 gen_neg = gen_negxf2;
45999 break;
46000 case HImode:
46001 gen_neg = gen_neghi2;
46002 break;
46003 case SImode:
46004 gen_neg = gen_negsi2;
46005 break;
46006 case DImode:
46007 gen_neg = gen_negdi2;
46008 break;
46009 default:
46010 gcc_unreachable ();
46013 e1 = gen_reg_rtx (inmode);
46014 e2 = gen_reg_rtx (inmode);
46015 res = gen_reg_rtx (outmode);
46017 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
46019 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
46021 /* scratch = fxam(op1) */
46022 emit_insn (gen_rtx_SET (scratch,
46023 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
46024 UNSPEC_FXAM)));
46025 /* e1 = fabs(op1) */
46026 emit_insn (gen_abs (e1, op1));
46028 /* e2 = e1 + 0.5 */
46029 half = force_reg (inmode, half);
46030 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
46032 /* res = floor(e2) */
46033 if (inmode != XFmode)
46035 tmp1 = gen_reg_rtx (XFmode);
46037 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
46039 else
46040 tmp1 = e2;
46042 switch (outmode)
46044 case SFmode:
46045 case DFmode:
46047 rtx tmp0 = gen_reg_rtx (XFmode);
46049 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
46051 emit_insn (gen_rtx_SET (res,
46052 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
46053 UNSPEC_TRUNC_NOOP)));
46055 break;
46056 case XFmode:
46057 emit_insn (gen_frndintxf2_floor (res, tmp1));
46058 break;
46059 case HImode:
46060 emit_insn (gen_lfloorxfhi2 (res, tmp1));
46061 break;
46062 case SImode:
46063 emit_insn (gen_lfloorxfsi2 (res, tmp1));
46064 break;
46065 case DImode:
46066 emit_insn (gen_lfloorxfdi2 (res, tmp1));
46067 break;
46068 default:
46069 gcc_unreachable ();
46072 /* flags = signbit(a) */
46073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46075 /* if (flags) then res = -res */
46076 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46077 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46078 gen_rtx_LABEL_REF (VOIDmode, jump_label),
46079 pc_rtx);
46080 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46081 predict_jump (REG_BR_PROB_BASE * 50 / 100);
46082 JUMP_LABEL (insn) = jump_label;
46084 emit_insn (gen_neg (res, res));
46086 emit_label (jump_label);
46087 LABEL_NUSES (jump_label) = 1;
46089 emit_move_insn (op0, res);
46092 /* Output code to perform a Newton-Rhapson approximation of a single precision
46093 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
46095 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46097 rtx x0, x1, e0, e1;
46099 x0 = gen_reg_rtx (mode);
46100 e0 = gen_reg_rtx (mode);
46101 e1 = gen_reg_rtx (mode);
46102 x1 = gen_reg_rtx (mode);
46104 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46106 b = force_reg (mode, b);
46108 /* x0 = rcp(b) estimate */
46109 if (mode == V16SFmode || mode == V8DFmode)
46110 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46111 UNSPEC_RCP14)));
46112 else
46113 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46114 UNSPEC_RCP)));
46116 /* e0 = x0 * b */
46117 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
46119 /* e0 = x0 * e0 */
46120 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
46122 /* e1 = x0 + x0 */
46123 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
46125 /* x1 = e1 - e0 */
46126 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
46128 /* res = a * x1 */
46129 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
46132 /* Output code to perform a Newton-Rhapson approximation of a
46133 single precision floating point [reciprocal] square root. */
46135 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46136 bool recip)
46138 rtx x0, e0, e1, e2, e3, mthree, mhalf;
46139 REAL_VALUE_TYPE r;
46140 int unspec;
46142 x0 = gen_reg_rtx (mode);
46143 e0 = gen_reg_rtx (mode);
46144 e1 = gen_reg_rtx (mode);
46145 e2 = gen_reg_rtx (mode);
46146 e3 = gen_reg_rtx (mode);
46148 real_from_integer (&r, VOIDmode, -3, SIGNED);
46149 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46151 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46152 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46153 unspec = UNSPEC_RSQRT;
46155 if (VECTOR_MODE_P (mode))
46157 mthree = ix86_build_const_vector (mode, true, mthree);
46158 mhalf = ix86_build_const_vector (mode, true, mhalf);
46159 /* There is no 512-bit rsqrt. There is however rsqrt14. */
46160 if (GET_MODE_SIZE (mode) == 64)
46161 unspec = UNSPEC_RSQRT14;
46164 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46165 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46167 a = force_reg (mode, a);
46169 /* x0 = rsqrt(a) estimate */
46170 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46171 unspec)));
46173 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
46174 if (!recip)
46176 rtx zero, mask;
46178 zero = gen_reg_rtx (mode);
46179 mask = gen_reg_rtx (mode);
46181 zero = force_reg (mode, CONST0_RTX(mode));
46183 /* Handle masked compare. */
46184 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46186 mask = gen_reg_rtx (HImode);
46187 /* Imm value 0x4 corresponds to not-equal comparison. */
46188 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46189 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46191 else
46193 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
46195 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
46199 /* e0 = x0 * a */
46200 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
46201 /* e1 = e0 * x0 */
46202 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
46204 /* e2 = e1 - 3. */
46205 mthree = force_reg (mode, mthree);
46206 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
46208 mhalf = force_reg (mode, mhalf);
46209 if (recip)
46210 /* e3 = -.5 * x0 */
46211 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
46212 else
46213 /* e3 = -.5 * e0 */
46214 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
46215 /* ret = e2 * e3 */
46216 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
46219 #ifdef TARGET_SOLARIS
46220 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
46222 static void
46223 i386_solaris_elf_named_section (const char *name, unsigned int flags,
46224 tree decl)
46226 /* With Binutils 2.15, the "@unwind" marker must be specified on
46227 every occurrence of the ".eh_frame" section, not just the first
46228 one. */
46229 if (TARGET_64BIT
46230 && strcmp (name, ".eh_frame") == 0)
46232 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46233 flags & SECTION_WRITE ? "aw" : "a");
46234 return;
46237 #ifndef USE_GAS
46238 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46240 solaris_elf_asm_comdat_section (name, flags, decl);
46241 return;
46243 #endif
46245 default_elf_asm_named_section (name, flags, decl);
46247 #endif /* TARGET_SOLARIS */
46249 /* Return the mangling of TYPE if it is an extended fundamental type. */
46251 static const char *
46252 ix86_mangle_type (const_tree type)
46254 type = TYPE_MAIN_VARIANT (type);
46256 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46257 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46258 return NULL;
46260 switch (TYPE_MODE (type))
46262 case TFmode:
46263 /* __float128 is "g". */
46264 return "g";
46265 case XFmode:
46266 /* "long double" or __float80 is "e". */
46267 return "e";
46268 default:
46269 return NULL;
46273 /* For 32-bit code we can save PIC register setup by using
46274 __stack_chk_fail_local hidden function instead of calling
46275 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46276 register, so it is better to call __stack_chk_fail directly. */
46278 static tree ATTRIBUTE_UNUSED
46279 ix86_stack_protect_fail (void)
46281 return TARGET_64BIT
46282 ? default_external_stack_protect_fail ()
46283 : default_hidden_stack_protect_fail ();
46286 /* Select a format to encode pointers in exception handling data. CODE
46287 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46288 true if the symbol may be affected by dynamic relocations.
46290 ??? All x86 object file formats are capable of representing this.
46291 After all, the relocation needed is the same as for the call insn.
46292 Whether or not a particular assembler allows us to enter such, I
46293 guess we'll have to see. */
46295 asm_preferred_eh_data_format (int code, int global)
46297 if (flag_pic)
46299 int type = DW_EH_PE_sdata8;
46300 if (!TARGET_64BIT
46301 || ix86_cmodel == CM_SMALL_PIC
46302 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46303 type = DW_EH_PE_sdata4;
46304 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46306 if (ix86_cmodel == CM_SMALL
46307 || (ix86_cmodel == CM_MEDIUM && code))
46308 return DW_EH_PE_udata4;
46309 return DW_EH_PE_absptr;
46312 /* Expand copysign from SIGN to the positive value ABS_VALUE
46313 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46314 the sign-bit. */
46315 static void
46316 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46318 machine_mode mode = GET_MODE (sign);
46319 rtx sgn = gen_reg_rtx (mode);
46320 if (mask == NULL_RTX)
46322 machine_mode vmode;
46324 if (mode == SFmode)
46325 vmode = V4SFmode;
46326 else if (mode == DFmode)
46327 vmode = V2DFmode;
46328 else
46329 vmode = mode;
46331 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46332 if (!VECTOR_MODE_P (mode))
46334 /* We need to generate a scalar mode mask in this case. */
46335 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46336 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46337 mask = gen_reg_rtx (mode);
46338 emit_insn (gen_rtx_SET (mask, tmp));
46341 else
46342 mask = gen_rtx_NOT (mode, mask);
46343 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
46344 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
46347 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46348 mask for masking out the sign-bit is stored in *SMASK, if that is
46349 non-null. */
46350 static rtx
46351 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46353 machine_mode vmode, mode = GET_MODE (op0);
46354 rtx xa, mask;
46356 xa = gen_reg_rtx (mode);
46357 if (mode == SFmode)
46358 vmode = V4SFmode;
46359 else if (mode == DFmode)
46360 vmode = V2DFmode;
46361 else
46362 vmode = mode;
46363 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46364 if (!VECTOR_MODE_P (mode))
46366 /* We need to generate a scalar mode mask in this case. */
46367 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46368 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46369 mask = gen_reg_rtx (mode);
46370 emit_insn (gen_rtx_SET (mask, tmp));
46372 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46374 if (smask)
46375 *smask = mask;
46377 return xa;
46380 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46381 swapping the operands if SWAP_OPERANDS is true. The expanded
46382 code is a forward jump to a newly created label in case the
46383 comparison is true. The generated label rtx is returned. */
46384 static rtx_code_label *
46385 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46386 bool swap_operands)
46388 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46389 rtx_code_label *label;
46390 rtx tmp;
46392 if (swap_operands)
46393 std::swap (op0, op1);
46395 label = gen_label_rtx ();
46396 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46397 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46398 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46399 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46400 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46401 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46402 JUMP_LABEL (tmp) = label;
46404 return label;
46407 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46408 using comparison code CODE. Operands are swapped for the comparison if
46409 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46410 static rtx
46411 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46412 bool swap_operands)
46414 rtx (*insn)(rtx, rtx, rtx, rtx);
46415 machine_mode mode = GET_MODE (op0);
46416 rtx mask = gen_reg_rtx (mode);
46418 if (swap_operands)
46419 std::swap (op0, op1);
46421 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46423 emit_insn (insn (mask, op0, op1,
46424 gen_rtx_fmt_ee (code, mode, op0, op1)));
46425 return mask;
46428 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46429 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46430 static rtx
46431 ix86_gen_TWO52 (machine_mode mode)
46433 REAL_VALUE_TYPE TWO52r;
46434 rtx TWO52;
46436 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46437 TWO52 = const_double_from_real_value (TWO52r, mode);
46438 TWO52 = force_reg (mode, TWO52);
46440 return TWO52;
46443 /* Expand SSE sequence for computing lround from OP1 storing
46444 into OP0. */
46445 void
46446 ix86_expand_lround (rtx op0, rtx op1)
46448 /* C code for the stuff we're doing below:
46449 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46450 return (long)tmp;
46452 machine_mode mode = GET_MODE (op1);
46453 const struct real_format *fmt;
46454 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46455 rtx adj;
46457 /* load nextafter (0.5, 0.0) */
46458 fmt = REAL_MODE_FORMAT (mode);
46459 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46460 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46462 /* adj = copysign (0.5, op1) */
46463 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46464 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46466 /* adj = op1 + adj */
46467 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46469 /* op0 = (imode)adj */
46470 expand_fix (op0, adj, 0);
46473 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46474 into OPERAND0. */
46475 void
46476 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46478 /* C code for the stuff we're doing below (for do_floor):
46479 xi = (long)op1;
46480 xi -= (double)xi > op1 ? 1 : 0;
46481 return xi;
46483 machine_mode fmode = GET_MODE (op1);
46484 machine_mode imode = GET_MODE (op0);
46485 rtx ireg, freg, tmp;
46486 rtx_code_label *label;
46488 /* reg = (long)op1 */
46489 ireg = gen_reg_rtx (imode);
46490 expand_fix (ireg, op1, 0);
46492 /* freg = (double)reg */
46493 freg = gen_reg_rtx (fmode);
46494 expand_float (freg, ireg, 0);
46496 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46497 label = ix86_expand_sse_compare_and_jump (UNLE,
46498 freg, op1, !do_floor);
46499 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46500 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46501 emit_move_insn (ireg, tmp);
46503 emit_label (label);
46504 LABEL_NUSES (label) = 1;
46506 emit_move_insn (op0, ireg);
46509 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46510 result in OPERAND0. */
46511 void
46512 ix86_expand_rint (rtx operand0, rtx operand1)
46514 /* C code for the stuff we're doing below:
46515 xa = fabs (operand1);
46516 if (!isless (xa, 2**52))
46517 return operand1;
46518 xa = xa + 2**52 - 2**52;
46519 return copysign (xa, operand1);
46521 machine_mode mode = GET_MODE (operand0);
46522 rtx res, xa, TWO52, mask;
46523 rtx_code_label *label;
46525 res = gen_reg_rtx (mode);
46526 emit_move_insn (res, operand1);
46528 /* xa = abs (operand1) */
46529 xa = ix86_expand_sse_fabs (res, &mask);
46531 /* if (!isless (xa, TWO52)) goto label; */
46532 TWO52 = ix86_gen_TWO52 (mode);
46533 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46535 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46536 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46538 ix86_sse_copysign_to_positive (res, xa, res, mask);
46540 emit_label (label);
46541 LABEL_NUSES (label) = 1;
46543 emit_move_insn (operand0, res);
46546 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46547 into OPERAND0. */
46548 void
46549 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46551 /* C code for the stuff we expand below.
46552 double xa = fabs (x), x2;
46553 if (!isless (xa, TWO52))
46554 return x;
46555 xa = xa + TWO52 - TWO52;
46556 x2 = copysign (xa, x);
46557 Compensate. Floor:
46558 if (x2 > x)
46559 x2 -= 1;
46560 Compensate. Ceil:
46561 if (x2 < x)
46562 x2 -= -1;
46563 return x2;
46565 machine_mode mode = GET_MODE (operand0);
46566 rtx xa, TWO52, tmp, one, res, mask;
46567 rtx_code_label *label;
46569 TWO52 = ix86_gen_TWO52 (mode);
46571 /* Temporary for holding the result, initialized to the input
46572 operand to ease control flow. */
46573 res = gen_reg_rtx (mode);
46574 emit_move_insn (res, operand1);
46576 /* xa = abs (operand1) */
46577 xa = ix86_expand_sse_fabs (res, &mask);
46579 /* if (!isless (xa, TWO52)) goto label; */
46580 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46582 /* xa = xa + TWO52 - TWO52; */
46583 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46584 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46586 /* xa = copysign (xa, operand1) */
46587 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46589 /* generate 1.0 or -1.0 */
46590 one = force_reg (mode,
46591 const_double_from_real_value (do_floor
46592 ? dconst1 : dconstm1, mode));
46594 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46595 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46596 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46597 /* We always need to subtract here to preserve signed zero. */
46598 tmp = expand_simple_binop (mode, MINUS,
46599 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46600 emit_move_insn (res, tmp);
46602 emit_label (label);
46603 LABEL_NUSES (label) = 1;
46605 emit_move_insn (operand0, res);
46608 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46609 into OPERAND0. */
46610 void
46611 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46613 /* C code for the stuff we expand below.
46614 double xa = fabs (x), x2;
46615 if (!isless (xa, TWO52))
46616 return x;
46617 x2 = (double)(long)x;
46618 Compensate. Floor:
46619 if (x2 > x)
46620 x2 -= 1;
46621 Compensate. Ceil:
46622 if (x2 < x)
46623 x2 += 1;
46624 if (HONOR_SIGNED_ZEROS (mode))
46625 return copysign (x2, x);
46626 return x2;
46628 machine_mode mode = GET_MODE (operand0);
46629 rtx xa, xi, TWO52, tmp, one, res, mask;
46630 rtx_code_label *label;
46632 TWO52 = ix86_gen_TWO52 (mode);
46634 /* Temporary for holding the result, initialized to the input
46635 operand to ease control flow. */
46636 res = gen_reg_rtx (mode);
46637 emit_move_insn (res, operand1);
46639 /* xa = abs (operand1) */
46640 xa = ix86_expand_sse_fabs (res, &mask);
46642 /* if (!isless (xa, TWO52)) goto label; */
46643 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46645 /* xa = (double)(long)x */
46646 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46647 expand_fix (xi, res, 0);
46648 expand_float (xa, xi, 0);
46650 /* generate 1.0 */
46651 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46653 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46654 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46655 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46656 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46657 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46658 emit_move_insn (res, tmp);
46660 if (HONOR_SIGNED_ZEROS (mode))
46661 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46663 emit_label (label);
46664 LABEL_NUSES (label) = 1;
46666 emit_move_insn (operand0, res);
46669 /* Expand SSE sequence for computing round from OPERAND1 storing
46670 into OPERAND0. Sequence that works without relying on DImode truncation
46671 via cvttsd2siq that is only available on 64bit targets. */
46672 void
46673 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46675 /* C code for the stuff we expand below.
46676 double xa = fabs (x), xa2, x2;
46677 if (!isless (xa, TWO52))
46678 return x;
46679 Using the absolute value and copying back sign makes
46680 -0.0 -> -0.0 correct.
46681 xa2 = xa + TWO52 - TWO52;
46682 Compensate.
46683 dxa = xa2 - xa;
46684 if (dxa <= -0.5)
46685 xa2 += 1;
46686 else if (dxa > 0.5)
46687 xa2 -= 1;
46688 x2 = copysign (xa2, x);
46689 return x2;
46691 machine_mode mode = GET_MODE (operand0);
46692 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46693 rtx_code_label *label;
46695 TWO52 = ix86_gen_TWO52 (mode);
46697 /* Temporary for holding the result, initialized to the input
46698 operand to ease control flow. */
46699 res = gen_reg_rtx (mode);
46700 emit_move_insn (res, operand1);
46702 /* xa = abs (operand1) */
46703 xa = ix86_expand_sse_fabs (res, &mask);
46705 /* if (!isless (xa, TWO52)) goto label; */
46706 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46708 /* xa2 = xa + TWO52 - TWO52; */
46709 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46710 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46712 /* dxa = xa2 - xa; */
46713 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46715 /* generate 0.5, 1.0 and -0.5 */
46716 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46717 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46718 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46719 0, OPTAB_DIRECT);
46721 /* Compensate. */
46722 tmp = gen_reg_rtx (mode);
46723 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46724 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46725 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46726 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46727 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46728 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46729 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46730 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46732 /* res = copysign (xa2, operand1) */
46733 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46735 emit_label (label);
46736 LABEL_NUSES (label) = 1;
46738 emit_move_insn (operand0, res);
46741 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46742 into OPERAND0. */
46743 void
46744 ix86_expand_trunc (rtx operand0, rtx operand1)
46746 /* C code for SSE variant we expand below.
46747 double xa = fabs (x), x2;
46748 if (!isless (xa, TWO52))
46749 return x;
46750 x2 = (double)(long)x;
46751 if (HONOR_SIGNED_ZEROS (mode))
46752 return copysign (x2, x);
46753 return x2;
46755 machine_mode mode = GET_MODE (operand0);
46756 rtx xa, xi, TWO52, res, mask;
46757 rtx_code_label *label;
46759 TWO52 = ix86_gen_TWO52 (mode);
46761 /* Temporary for holding the result, initialized to the input
46762 operand to ease control flow. */
46763 res = gen_reg_rtx (mode);
46764 emit_move_insn (res, operand1);
46766 /* xa = abs (operand1) */
46767 xa = ix86_expand_sse_fabs (res, &mask);
46769 /* if (!isless (xa, TWO52)) goto label; */
46770 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46772 /* x = (double)(long)x */
46773 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46774 expand_fix (xi, res, 0);
46775 expand_float (res, xi, 0);
46777 if (HONOR_SIGNED_ZEROS (mode))
46778 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46780 emit_label (label);
46781 LABEL_NUSES (label) = 1;
46783 emit_move_insn (operand0, res);
46786 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46787 into OPERAND0. */
46788 void
46789 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46791 machine_mode mode = GET_MODE (operand0);
46792 rtx xa, mask, TWO52, one, res, smask, tmp;
46793 rtx_code_label *label;
46795 /* C code for SSE variant we expand below.
46796 double xa = fabs (x), x2;
46797 if (!isless (xa, TWO52))
46798 return x;
46799 xa2 = xa + TWO52 - TWO52;
46800 Compensate:
46801 if (xa2 > xa)
46802 xa2 -= 1.0;
46803 x2 = copysign (xa2, x);
46804 return x2;
46807 TWO52 = ix86_gen_TWO52 (mode);
46809 /* Temporary for holding the result, initialized to the input
46810 operand to ease control flow. */
46811 res = gen_reg_rtx (mode);
46812 emit_move_insn (res, operand1);
46814 /* xa = abs (operand1) */
46815 xa = ix86_expand_sse_fabs (res, &smask);
46817 /* if (!isless (xa, TWO52)) goto label; */
46818 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46820 /* res = xa + TWO52 - TWO52; */
46821 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46822 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46823 emit_move_insn (res, tmp);
46825 /* generate 1.0 */
46826 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46828 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46829 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46830 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46831 tmp = expand_simple_binop (mode, MINUS,
46832 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46833 emit_move_insn (res, tmp);
46835 /* res = copysign (res, operand1) */
46836 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46838 emit_label (label);
46839 LABEL_NUSES (label) = 1;
46841 emit_move_insn (operand0, res);
46844 /* Expand SSE sequence for computing round from OPERAND1 storing
46845 into OPERAND0. */
46846 void
46847 ix86_expand_round (rtx operand0, rtx operand1)
46849 /* C code for the stuff we're doing below:
46850 double xa = fabs (x);
46851 if (!isless (xa, TWO52))
46852 return x;
46853 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46854 return copysign (xa, x);
46856 machine_mode mode = GET_MODE (operand0);
46857 rtx res, TWO52, xa, xi, half, mask;
46858 rtx_code_label *label;
46859 const struct real_format *fmt;
46860 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46862 /* Temporary for holding the result, initialized to the input
46863 operand to ease control flow. */
46864 res = gen_reg_rtx (mode);
46865 emit_move_insn (res, operand1);
46867 TWO52 = ix86_gen_TWO52 (mode);
46868 xa = ix86_expand_sse_fabs (res, &mask);
46869 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46871 /* load nextafter (0.5, 0.0) */
46872 fmt = REAL_MODE_FORMAT (mode);
46873 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46874 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46876 /* xa = xa + 0.5 */
46877 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46878 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46880 /* xa = (double)(int64_t)xa */
46881 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46882 expand_fix (xi, xa, 0);
46883 expand_float (xa, xi, 0);
46885 /* res = copysign (xa, operand1) */
46886 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46888 emit_label (label);
46889 LABEL_NUSES (label) = 1;
46891 emit_move_insn (operand0, res);
46894 /* Expand SSE sequence for computing round
46895 from OP1 storing into OP0 using sse4 round insn. */
46896 void
46897 ix86_expand_round_sse4 (rtx op0, rtx op1)
46899 machine_mode mode = GET_MODE (op0);
46900 rtx e1, e2, res, half;
46901 const struct real_format *fmt;
46902 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46903 rtx (*gen_copysign) (rtx, rtx, rtx);
46904 rtx (*gen_round) (rtx, rtx, rtx);
46906 switch (mode)
46908 case SFmode:
46909 gen_copysign = gen_copysignsf3;
46910 gen_round = gen_sse4_1_roundsf2;
46911 break;
46912 case DFmode:
46913 gen_copysign = gen_copysigndf3;
46914 gen_round = gen_sse4_1_rounddf2;
46915 break;
46916 default:
46917 gcc_unreachable ();
46920 /* round (a) = trunc (a + copysign (0.5, a)) */
46922 /* load nextafter (0.5, 0.0) */
46923 fmt = REAL_MODE_FORMAT (mode);
46924 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46925 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46926 half = const_double_from_real_value (pred_half, mode);
46928 /* e1 = copysign (0.5, op1) */
46929 e1 = gen_reg_rtx (mode);
46930 emit_insn (gen_copysign (e1, half, op1));
46932 /* e2 = op1 + e1 */
46933 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46935 /* res = trunc (e2) */
46936 res = gen_reg_rtx (mode);
46937 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46939 emit_move_insn (op0, res);
46943 /* Table of valid machine attributes. */
46944 static const struct attribute_spec ix86_attribute_table[] =
46946 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46947 affects_type_identity } */
46948 /* Stdcall attribute says callee is responsible for popping arguments
46949 if they are not variable. */
46950 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46951 true },
46952 /* Fastcall attribute says callee is responsible for popping arguments
46953 if they are not variable. */
46954 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46955 true },
46956 /* Thiscall attribute says callee is responsible for popping arguments
46957 if they are not variable. */
46958 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46959 true },
46960 /* Cdecl attribute says the callee is a normal C declaration */
46961 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46962 true },
46963 /* Regparm attribute specifies how many integer arguments are to be
46964 passed in registers. */
46965 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
46966 true },
46967 /* Sseregparm attribute says we are using x86_64 calling conventions
46968 for FP arguments. */
46969 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46970 true },
46971 /* The transactional memory builtins are implicitly regparm or fastcall
46972 depending on the ABI. Override the generic do-nothing attribute that
46973 these builtins were declared with. */
46974 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46975 true },
46976 /* force_align_arg_pointer says this function realigns the stack at entry. */
46977 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46978 false, true, true, ix86_handle_cconv_attribute, false },
46979 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46980 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46981 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46982 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
46983 false },
46984 #endif
46985 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46986 false },
46987 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
46988 false },
46989 #ifdef SUBTARGET_ATTRIBUTE_TABLE
46990 SUBTARGET_ATTRIBUTE_TABLE,
46991 #endif
46992 /* ms_abi and sysv_abi calling convention function attributes. */
46993 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46994 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46995 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46996 false },
46997 { "callee_pop_aggregate_return", 1, 1, false, true, true,
46998 ix86_handle_callee_pop_aggregate_return, true },
46999 /* End element. */
47000 { NULL, 0, 0, false, false, false, NULL, false }
47003 /* Implement targetm.vectorize.builtin_vectorization_cost. */
47004 static int
47005 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
47006 tree vectype, int)
47008 unsigned elements;
47010 switch (type_of_cost)
47012 case scalar_stmt:
47013 return ix86_cost->scalar_stmt_cost;
47015 case scalar_load:
47016 return ix86_cost->scalar_load_cost;
47018 case scalar_store:
47019 return ix86_cost->scalar_store_cost;
47021 case vector_stmt:
47022 return ix86_cost->vec_stmt_cost;
47024 case vector_load:
47025 return ix86_cost->vec_align_load_cost;
47027 case vector_store:
47028 return ix86_cost->vec_store_cost;
47030 case vec_to_scalar:
47031 return ix86_cost->vec_to_scalar_cost;
47033 case scalar_to_vec:
47034 return ix86_cost->scalar_to_vec_cost;
47036 case unaligned_load:
47037 case unaligned_store:
47038 return ix86_cost->vec_unalign_load_cost;
47040 case cond_branch_taken:
47041 return ix86_cost->cond_taken_branch_cost;
47043 case cond_branch_not_taken:
47044 return ix86_cost->cond_not_taken_branch_cost;
47046 case vec_perm:
47047 case vec_promote_demote:
47048 return ix86_cost->vec_stmt_cost;
47050 case vec_construct:
47051 elements = TYPE_VECTOR_SUBPARTS (vectype);
47052 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
47054 default:
47055 gcc_unreachable ();
47059 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47060 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47061 insn every time. */
47063 static GTY(()) rtx_insn *vselect_insn;
47065 /* Initialize vselect_insn. */
47067 static void
47068 init_vselect_insn (void)
47070 unsigned i;
47071 rtx x;
47073 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47074 for (i = 0; i < MAX_VECT_LEN; ++i)
47075 XVECEXP (x, 0, i) = const0_rtx;
47076 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47077 const0_rtx), x);
47078 x = gen_rtx_SET (const0_rtx, x);
47079 start_sequence ();
47080 vselect_insn = emit_insn (x);
47081 end_sequence ();
47084 /* Construct (set target (vec_select op0 (parallel perm))) and
47085 return true if that's a valid instruction in the active ISA. */
47087 static bool
47088 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47089 unsigned nelt, bool testing_p)
47091 unsigned int i;
47092 rtx x, save_vconcat;
47093 int icode;
47095 if (vselect_insn == NULL_RTX)
47096 init_vselect_insn ();
47098 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47099 PUT_NUM_ELEM (XVEC (x, 0), nelt);
47100 for (i = 0; i < nelt; ++i)
47101 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47102 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47103 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47104 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47105 SET_DEST (PATTERN (vselect_insn)) = target;
47106 icode = recog_memoized (vselect_insn);
47108 if (icode >= 0 && !testing_p)
47109 emit_insn (copy_rtx (PATTERN (vselect_insn)));
47111 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47112 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47113 INSN_CODE (vselect_insn) = -1;
47115 return icode >= 0;
47118 /* Similar, but generate a vec_concat from op0 and op1 as well. */
47120 static bool
47121 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47122 const unsigned char *perm, unsigned nelt,
47123 bool testing_p)
47125 machine_mode v2mode;
47126 rtx x;
47127 bool ok;
47129 if (vselect_insn == NULL_RTX)
47130 init_vselect_insn ();
47132 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47133 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47134 PUT_MODE (x, v2mode);
47135 XEXP (x, 0) = op0;
47136 XEXP (x, 1) = op1;
47137 ok = expand_vselect (target, x, perm, nelt, testing_p);
47138 XEXP (x, 0) = const0_rtx;
47139 XEXP (x, 1) = const0_rtx;
47140 return ok;
47143 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47144 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
47146 static bool
47147 expand_vec_perm_blend (struct expand_vec_perm_d *d)
47149 machine_mode mmode, vmode = d->vmode;
47150 unsigned i, mask, nelt = d->nelt;
47151 rtx target, op0, op1, maskop, x;
47152 rtx rperm[32], vperm;
47154 if (d->one_operand_p)
47155 return false;
47156 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47157 && (TARGET_AVX512BW
47158 || GET_MODE_UNIT_SIZE (vmode) >= 4))
47160 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47162 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47164 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47166 else
47167 return false;
47169 /* This is a blend, not a permute. Elements must stay in their
47170 respective lanes. */
47171 for (i = 0; i < nelt; ++i)
47173 unsigned e = d->perm[i];
47174 if (!(e == i || e == i + nelt))
47175 return false;
47178 if (d->testing_p)
47179 return true;
47181 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
47182 decision should be extracted elsewhere, so that we only try that
47183 sequence once all budget==3 options have been tried. */
47184 target = d->target;
47185 op0 = d->op0;
47186 op1 = d->op1;
47187 mask = 0;
47189 switch (vmode)
47191 case V8DFmode:
47192 case V16SFmode:
47193 case V4DFmode:
47194 case V8SFmode:
47195 case V2DFmode:
47196 case V4SFmode:
47197 case V8HImode:
47198 case V8SImode:
47199 case V32HImode:
47200 case V64QImode:
47201 case V16SImode:
47202 case V8DImode:
47203 for (i = 0; i < nelt; ++i)
47204 mask |= (d->perm[i] >= nelt) << i;
47205 break;
47207 case V2DImode:
47208 for (i = 0; i < 2; ++i)
47209 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47210 vmode = V8HImode;
47211 goto do_subreg;
47213 case V4SImode:
47214 for (i = 0; i < 4; ++i)
47215 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47216 vmode = V8HImode;
47217 goto do_subreg;
47219 case V16QImode:
47220 /* See if bytes move in pairs so we can use pblendw with
47221 an immediate argument, rather than pblendvb with a vector
47222 argument. */
47223 for (i = 0; i < 16; i += 2)
47224 if (d->perm[i] + 1 != d->perm[i + 1])
47226 use_pblendvb:
47227 for (i = 0; i < nelt; ++i)
47228 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47230 finish_pblendvb:
47231 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47232 vperm = force_reg (vmode, vperm);
47234 if (GET_MODE_SIZE (vmode) == 16)
47235 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47236 else
47237 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47238 if (target != d->target)
47239 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47240 return true;
47243 for (i = 0; i < 8; ++i)
47244 mask |= (d->perm[i * 2] >= 16) << i;
47245 vmode = V8HImode;
47246 /* FALLTHRU */
47248 do_subreg:
47249 target = gen_reg_rtx (vmode);
47250 op0 = gen_lowpart (vmode, op0);
47251 op1 = gen_lowpart (vmode, op1);
47252 break;
47254 case V32QImode:
47255 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47256 for (i = 0; i < 32; i += 2)
47257 if (d->perm[i] + 1 != d->perm[i + 1])
47258 goto use_pblendvb;
47259 /* See if bytes move in quadruplets. If yes, vpblendd
47260 with immediate can be used. */
47261 for (i = 0; i < 32; i += 4)
47262 if (d->perm[i] + 2 != d->perm[i + 2])
47263 break;
47264 if (i < 32)
47266 /* See if bytes move the same in both lanes. If yes,
47267 vpblendw with immediate can be used. */
47268 for (i = 0; i < 16; i += 2)
47269 if (d->perm[i] + 16 != d->perm[i + 16])
47270 goto use_pblendvb;
47272 /* Use vpblendw. */
47273 for (i = 0; i < 16; ++i)
47274 mask |= (d->perm[i * 2] >= 32) << i;
47275 vmode = V16HImode;
47276 goto do_subreg;
47279 /* Use vpblendd. */
47280 for (i = 0; i < 8; ++i)
47281 mask |= (d->perm[i * 4] >= 32) << i;
47282 vmode = V8SImode;
47283 goto do_subreg;
47285 case V16HImode:
47286 /* See if words move in pairs. If yes, vpblendd can be used. */
47287 for (i = 0; i < 16; i += 2)
47288 if (d->perm[i] + 1 != d->perm[i + 1])
47289 break;
47290 if (i < 16)
47292 /* See if words move the same in both lanes. If not,
47293 vpblendvb must be used. */
47294 for (i = 0; i < 8; i++)
47295 if (d->perm[i] + 8 != d->perm[i + 8])
47297 /* Use vpblendvb. */
47298 for (i = 0; i < 32; ++i)
47299 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47301 vmode = V32QImode;
47302 nelt = 32;
47303 target = gen_reg_rtx (vmode);
47304 op0 = gen_lowpart (vmode, op0);
47305 op1 = gen_lowpart (vmode, op1);
47306 goto finish_pblendvb;
47309 /* Use vpblendw. */
47310 for (i = 0; i < 16; ++i)
47311 mask |= (d->perm[i] >= 16) << i;
47312 break;
47315 /* Use vpblendd. */
47316 for (i = 0; i < 8; ++i)
47317 mask |= (d->perm[i * 2] >= 16) << i;
47318 vmode = V8SImode;
47319 goto do_subreg;
47321 case V4DImode:
47322 /* Use vpblendd. */
47323 for (i = 0; i < 4; ++i)
47324 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47325 vmode = V8SImode;
47326 goto do_subreg;
47328 default:
47329 gcc_unreachable ();
47332 switch (vmode)
47334 case V8DFmode:
47335 case V8DImode:
47336 mmode = QImode;
47337 break;
47338 case V16SFmode:
47339 case V16SImode:
47340 mmode = HImode;
47341 break;
47342 case V32HImode:
47343 mmode = SImode;
47344 break;
47345 case V64QImode:
47346 mmode = DImode;
47347 break;
47348 default:
47349 mmode = VOIDmode;
47352 if (mmode != VOIDmode)
47353 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47354 else
47355 maskop = GEN_INT (mask);
47357 /* This matches five different patterns with the different modes. */
47358 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47359 x = gen_rtx_SET (target, x);
47360 emit_insn (x);
47361 if (target != d->target)
47362 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47364 return true;
47367 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47368 in terms of the variable form of vpermilps.
47370 Note that we will have already failed the immediate input vpermilps,
47371 which requires that the high and low part shuffle be identical; the
47372 variable form doesn't require that. */
47374 static bool
47375 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47377 rtx rperm[8], vperm;
47378 unsigned i;
47380 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47381 return false;
47383 /* We can only permute within the 128-bit lane. */
47384 for (i = 0; i < 8; ++i)
47386 unsigned e = d->perm[i];
47387 if (i < 4 ? e >= 4 : e < 4)
47388 return false;
47391 if (d->testing_p)
47392 return true;
47394 for (i = 0; i < 8; ++i)
47396 unsigned e = d->perm[i];
47398 /* Within each 128-bit lane, the elements of op0 are numbered
47399 from 0 and the elements of op1 are numbered from 4. */
47400 if (e >= 8 + 4)
47401 e -= 8;
47402 else if (e >= 4)
47403 e -= 4;
47405 rperm[i] = GEN_INT (e);
47408 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47409 vperm = force_reg (V8SImode, vperm);
47410 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47412 return true;
47415 /* Return true if permutation D can be performed as VMODE permutation
47416 instead. */
47418 static bool
47419 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47421 unsigned int i, j, chunk;
47423 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47424 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47425 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47426 return false;
47428 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47429 return true;
47431 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47432 for (i = 0; i < d->nelt; i += chunk)
47433 if (d->perm[i] & (chunk - 1))
47434 return false;
47435 else
47436 for (j = 1; j < chunk; ++j)
47437 if (d->perm[i] + j != d->perm[i + j])
47438 return false;
47440 return true;
47443 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47444 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47446 static bool
47447 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47449 unsigned i, nelt, eltsz, mask;
47450 unsigned char perm[64];
47451 machine_mode vmode = V16QImode;
47452 rtx rperm[64], vperm, target, op0, op1;
47454 nelt = d->nelt;
47456 if (!d->one_operand_p)
47458 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47460 if (TARGET_AVX2
47461 && valid_perm_using_mode_p (V2TImode, d))
47463 if (d->testing_p)
47464 return true;
47466 /* Use vperm2i128 insn. The pattern uses
47467 V4DImode instead of V2TImode. */
47468 target = d->target;
47469 if (d->vmode != V4DImode)
47470 target = gen_reg_rtx (V4DImode);
47471 op0 = gen_lowpart (V4DImode, d->op0);
47472 op1 = gen_lowpart (V4DImode, d->op1);
47473 rperm[0]
47474 = GEN_INT ((d->perm[0] / (nelt / 2))
47475 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47476 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47477 if (target != d->target)
47478 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47479 return true;
47481 return false;
47484 else
47486 if (GET_MODE_SIZE (d->vmode) == 16)
47488 if (!TARGET_SSSE3)
47489 return false;
47491 else if (GET_MODE_SIZE (d->vmode) == 32)
47493 if (!TARGET_AVX2)
47494 return false;
47496 /* V4DImode should be already handled through
47497 expand_vselect by vpermq instruction. */
47498 gcc_assert (d->vmode != V4DImode);
47500 vmode = V32QImode;
47501 if (d->vmode == V8SImode
47502 || d->vmode == V16HImode
47503 || d->vmode == V32QImode)
47505 /* First see if vpermq can be used for
47506 V8SImode/V16HImode/V32QImode. */
47507 if (valid_perm_using_mode_p (V4DImode, d))
47509 for (i = 0; i < 4; i++)
47510 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47511 if (d->testing_p)
47512 return true;
47513 target = gen_reg_rtx (V4DImode);
47514 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47515 perm, 4, false))
47517 emit_move_insn (d->target,
47518 gen_lowpart (d->vmode, target));
47519 return true;
47521 return false;
47524 /* Next see if vpermd can be used. */
47525 if (valid_perm_using_mode_p (V8SImode, d))
47526 vmode = V8SImode;
47528 /* Or if vpermps can be used. */
47529 else if (d->vmode == V8SFmode)
47530 vmode = V8SImode;
47532 if (vmode == V32QImode)
47534 /* vpshufb only works intra lanes, it is not
47535 possible to shuffle bytes in between the lanes. */
47536 for (i = 0; i < nelt; ++i)
47537 if ((d->perm[i] ^ i) & (nelt / 2))
47538 return false;
47541 else if (GET_MODE_SIZE (d->vmode) == 64)
47543 if (!TARGET_AVX512BW)
47544 return false;
47546 /* If vpermq didn't work, vpshufb won't work either. */
47547 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47548 return false;
47550 vmode = V64QImode;
47551 if (d->vmode == V16SImode
47552 || d->vmode == V32HImode
47553 || d->vmode == V64QImode)
47555 /* First see if vpermq can be used for
47556 V16SImode/V32HImode/V64QImode. */
47557 if (valid_perm_using_mode_p (V8DImode, d))
47559 for (i = 0; i < 8; i++)
47560 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47561 if (d->testing_p)
47562 return true;
47563 target = gen_reg_rtx (V8DImode);
47564 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47565 perm, 8, false))
47567 emit_move_insn (d->target,
47568 gen_lowpart (d->vmode, target));
47569 return true;
47571 return false;
47574 /* Next see if vpermd can be used. */
47575 if (valid_perm_using_mode_p (V16SImode, d))
47576 vmode = V16SImode;
47578 /* Or if vpermps can be used. */
47579 else if (d->vmode == V16SFmode)
47580 vmode = V16SImode;
47581 if (vmode == V64QImode)
47583 /* vpshufb only works intra lanes, it is not
47584 possible to shuffle bytes in between the lanes. */
47585 for (i = 0; i < nelt; ++i)
47586 if ((d->perm[i] ^ i) & (nelt / 4))
47587 return false;
47590 else
47591 return false;
47594 if (d->testing_p)
47595 return true;
47597 if (vmode == V8SImode)
47598 for (i = 0; i < 8; ++i)
47599 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47600 else if (vmode == V16SImode)
47601 for (i = 0; i < 16; ++i)
47602 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47603 else
47605 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
47606 if (!d->one_operand_p)
47607 mask = 2 * nelt - 1;
47608 else if (vmode == V16QImode)
47609 mask = nelt - 1;
47610 else if (vmode == V64QImode)
47611 mask = nelt / 4 - 1;
47612 else
47613 mask = nelt / 2 - 1;
47615 for (i = 0; i < nelt; ++i)
47617 unsigned j, e = d->perm[i] & mask;
47618 for (j = 0; j < eltsz; ++j)
47619 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47623 vperm = gen_rtx_CONST_VECTOR (vmode,
47624 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47625 vperm = force_reg (vmode, vperm);
47627 target = d->target;
47628 if (d->vmode != vmode)
47629 target = gen_reg_rtx (vmode);
47630 op0 = gen_lowpart (vmode, d->op0);
47631 if (d->one_operand_p)
47633 if (vmode == V16QImode)
47634 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47635 else if (vmode == V32QImode)
47636 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47637 else if (vmode == V64QImode)
47638 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47639 else if (vmode == V8SFmode)
47640 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47641 else if (vmode == V8SImode)
47642 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47643 else if (vmode == V16SFmode)
47644 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47645 else if (vmode == V16SImode)
47646 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47647 else
47648 gcc_unreachable ();
47650 else
47652 op1 = gen_lowpart (vmode, d->op1);
47653 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47655 if (target != d->target)
47656 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47658 return true;
47661 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47662 in a single instruction. */
47664 static bool
47665 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47667 unsigned i, nelt = d->nelt;
47668 unsigned char perm2[MAX_VECT_LEN];
47670 /* Check plain VEC_SELECT first, because AVX has instructions that could
47671 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47672 input where SEL+CONCAT may not. */
47673 if (d->one_operand_p)
47675 int mask = nelt - 1;
47676 bool identity_perm = true;
47677 bool broadcast_perm = true;
47679 for (i = 0; i < nelt; i++)
47681 perm2[i] = d->perm[i] & mask;
47682 if (perm2[i] != i)
47683 identity_perm = false;
47684 if (perm2[i])
47685 broadcast_perm = false;
47688 if (identity_perm)
47690 if (!d->testing_p)
47691 emit_move_insn (d->target, d->op0);
47692 return true;
47694 else if (broadcast_perm && TARGET_AVX2)
47696 /* Use vpbroadcast{b,w,d}. */
47697 rtx (*gen) (rtx, rtx) = NULL;
47698 switch (d->vmode)
47700 case V64QImode:
47701 if (TARGET_AVX512BW)
47702 gen = gen_avx512bw_vec_dupv64qi_1;
47703 break;
47704 case V32QImode:
47705 gen = gen_avx2_pbroadcastv32qi_1;
47706 break;
47707 case V32HImode:
47708 if (TARGET_AVX512BW)
47709 gen = gen_avx512bw_vec_dupv32hi_1;
47710 break;
47711 case V16HImode:
47712 gen = gen_avx2_pbroadcastv16hi_1;
47713 break;
47714 case V16SImode:
47715 if (TARGET_AVX512F)
47716 gen = gen_avx512f_vec_dupv16si_1;
47717 break;
47718 case V8SImode:
47719 gen = gen_avx2_pbroadcastv8si_1;
47720 break;
47721 case V16QImode:
47722 gen = gen_avx2_pbroadcastv16qi;
47723 break;
47724 case V8HImode:
47725 gen = gen_avx2_pbroadcastv8hi;
47726 break;
47727 case V16SFmode:
47728 if (TARGET_AVX512F)
47729 gen = gen_avx512f_vec_dupv16sf_1;
47730 break;
47731 case V8SFmode:
47732 gen = gen_avx2_vec_dupv8sf_1;
47733 break;
47734 case V8DFmode:
47735 if (TARGET_AVX512F)
47736 gen = gen_avx512f_vec_dupv8df_1;
47737 break;
47738 case V8DImode:
47739 if (TARGET_AVX512F)
47740 gen = gen_avx512f_vec_dupv8di_1;
47741 break;
47742 /* For other modes prefer other shuffles this function creates. */
47743 default: break;
47745 if (gen != NULL)
47747 if (!d->testing_p)
47748 emit_insn (gen (d->target, d->op0));
47749 return true;
47753 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47754 return true;
47756 /* There are plenty of patterns in sse.md that are written for
47757 SEL+CONCAT and are not replicated for a single op. Perhaps
47758 that should be changed, to avoid the nastiness here. */
47760 /* Recognize interleave style patterns, which means incrementing
47761 every other permutation operand. */
47762 for (i = 0; i < nelt; i += 2)
47764 perm2[i] = d->perm[i] & mask;
47765 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47767 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47768 d->testing_p))
47769 return true;
47771 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47772 if (nelt >= 4)
47774 for (i = 0; i < nelt; i += 4)
47776 perm2[i + 0] = d->perm[i + 0] & mask;
47777 perm2[i + 1] = d->perm[i + 1] & mask;
47778 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47779 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47782 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47783 d->testing_p))
47784 return true;
47788 /* Finally, try the fully general two operand permute. */
47789 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47790 d->testing_p))
47791 return true;
47793 /* Recognize interleave style patterns with reversed operands. */
47794 if (!d->one_operand_p)
47796 for (i = 0; i < nelt; ++i)
47798 unsigned e = d->perm[i];
47799 if (e >= nelt)
47800 e -= nelt;
47801 else
47802 e += nelt;
47803 perm2[i] = e;
47806 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47807 d->testing_p))
47808 return true;
47811 /* Try the SSE4.1 blend variable merge instructions. */
47812 if (expand_vec_perm_blend (d))
47813 return true;
47815 /* Try one of the AVX vpermil variable permutations. */
47816 if (expand_vec_perm_vpermil (d))
47817 return true;
47819 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47820 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47821 if (expand_vec_perm_pshufb (d))
47822 return true;
47824 /* Try the AVX2 vpalignr instruction. */
47825 if (expand_vec_perm_palignr (d, true))
47826 return true;
47828 /* Try the AVX512F vpermi2 instructions. */
47829 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47830 return true;
47832 return false;
47835 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47836 in terms of a pair of pshuflw + pshufhw instructions. */
47838 static bool
47839 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47841 unsigned char perm2[MAX_VECT_LEN];
47842 unsigned i;
47843 bool ok;
47845 if (d->vmode != V8HImode || !d->one_operand_p)
47846 return false;
47848 /* The two permutations only operate in 64-bit lanes. */
47849 for (i = 0; i < 4; ++i)
47850 if (d->perm[i] >= 4)
47851 return false;
47852 for (i = 4; i < 8; ++i)
47853 if (d->perm[i] < 4)
47854 return false;
47856 if (d->testing_p)
47857 return true;
47859 /* Emit the pshuflw. */
47860 memcpy (perm2, d->perm, 4);
47861 for (i = 4; i < 8; ++i)
47862 perm2[i] = i;
47863 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47864 gcc_assert (ok);
47866 /* Emit the pshufhw. */
47867 memcpy (perm2 + 4, d->perm + 4, 4);
47868 for (i = 0; i < 4; ++i)
47869 perm2[i] = i;
47870 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47871 gcc_assert (ok);
47873 return true;
47876 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47877 the permutation using the SSSE3 palignr instruction. This succeeds
47878 when all of the elements in PERM fit within one vector and we merely
47879 need to shift them down so that a single vector permutation has a
47880 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47881 the vpalignr instruction itself can perform the requested permutation. */
47883 static bool
47884 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47886 unsigned i, nelt = d->nelt;
47887 unsigned min, max, minswap, maxswap;
47888 bool in_order, ok, swap = false;
47889 rtx shift, target;
47890 struct expand_vec_perm_d dcopy;
47892 /* Even with AVX, palignr only operates on 128-bit vectors,
47893 in AVX2 palignr operates on both 128-bit lanes. */
47894 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47895 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47896 return false;
47898 min = 2 * nelt;
47899 max = 0;
47900 minswap = 2 * nelt;
47901 maxswap = 0;
47902 for (i = 0; i < nelt; ++i)
47904 unsigned e = d->perm[i];
47905 unsigned eswap = d->perm[i] ^ nelt;
47906 if (GET_MODE_SIZE (d->vmode) == 32)
47908 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47909 eswap = e ^ (nelt / 2);
47911 if (e < min)
47912 min = e;
47913 if (e > max)
47914 max = e;
47915 if (eswap < minswap)
47916 minswap = eswap;
47917 if (eswap > maxswap)
47918 maxswap = eswap;
47920 if (min == 0
47921 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47923 if (d->one_operand_p
47924 || minswap == 0
47925 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47926 ? nelt / 2 : nelt))
47927 return false;
47928 swap = true;
47929 min = minswap;
47930 max = maxswap;
47933 /* Given that we have SSSE3, we know we'll be able to implement the
47934 single operand permutation after the palignr with pshufb for
47935 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47936 first. */
47937 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47938 return true;
47940 dcopy = *d;
47941 if (swap)
47943 dcopy.op0 = d->op1;
47944 dcopy.op1 = d->op0;
47945 for (i = 0; i < nelt; ++i)
47946 dcopy.perm[i] ^= nelt;
47949 in_order = true;
47950 for (i = 0; i < nelt; ++i)
47952 unsigned e = dcopy.perm[i];
47953 if (GET_MODE_SIZE (d->vmode) == 32
47954 && e >= nelt
47955 && (e & (nelt / 2 - 1)) < min)
47956 e = e - min - (nelt / 2);
47957 else
47958 e = e - min;
47959 if (e != i)
47960 in_order = false;
47961 dcopy.perm[i] = e;
47963 dcopy.one_operand_p = true;
47965 if (single_insn_only_p && !in_order)
47966 return false;
47968 /* For AVX2, test whether we can permute the result in one instruction. */
47969 if (d->testing_p)
47971 if (in_order)
47972 return true;
47973 dcopy.op1 = dcopy.op0;
47974 return expand_vec_perm_1 (&dcopy);
47977 shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode));
47978 if (GET_MODE_SIZE (d->vmode) == 16)
47980 target = gen_reg_rtx (TImode);
47981 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47982 gen_lowpart (TImode, dcopy.op0), shift));
47984 else
47986 target = gen_reg_rtx (V2TImode);
47987 emit_insn (gen_avx2_palignrv2ti (target,
47988 gen_lowpart (V2TImode, dcopy.op1),
47989 gen_lowpart (V2TImode, dcopy.op0),
47990 shift));
47993 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47995 /* Test for the degenerate case where the alignment by itself
47996 produces the desired permutation. */
47997 if (in_order)
47999 emit_move_insn (d->target, dcopy.op0);
48000 return true;
48003 ok = expand_vec_perm_1 (&dcopy);
48004 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
48006 return ok;
48009 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
48010 the permutation using the SSE4_1 pblendv instruction. Potentially
48011 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
48013 static bool
48014 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
48016 unsigned i, which, nelt = d->nelt;
48017 struct expand_vec_perm_d dcopy, dcopy1;
48018 machine_mode vmode = d->vmode;
48019 bool ok;
48021 /* Use the same checks as in expand_vec_perm_blend. */
48022 if (d->one_operand_p)
48023 return false;
48024 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48026 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48028 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48030 else
48031 return false;
48033 /* Figure out where permutation elements stay not in their
48034 respective lanes. */
48035 for (i = 0, which = 0; i < nelt; ++i)
48037 unsigned e = d->perm[i];
48038 if (e != i)
48039 which |= (e < nelt ? 1 : 2);
48041 /* We can pblend the part where elements stay not in their
48042 respective lanes only when these elements are all in one
48043 half of a permutation.
48044 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48045 lanes, but both 8 and 9 >= 8
48046 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48047 respective lanes and 8 >= 8, but 2 not. */
48048 if (which != 1 && which != 2)
48049 return false;
48050 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48051 return true;
48053 /* First we apply one operand permutation to the part where
48054 elements stay not in their respective lanes. */
48055 dcopy = *d;
48056 if (which == 2)
48057 dcopy.op0 = dcopy.op1 = d->op1;
48058 else
48059 dcopy.op0 = dcopy.op1 = d->op0;
48060 if (!d->testing_p)
48061 dcopy.target = gen_reg_rtx (vmode);
48062 dcopy.one_operand_p = true;
48064 for (i = 0; i < nelt; ++i)
48065 dcopy.perm[i] = d->perm[i] & (nelt - 1);
48067 ok = expand_vec_perm_1 (&dcopy);
48068 if (GET_MODE_SIZE (vmode) != 16 && !ok)
48069 return false;
48070 else
48071 gcc_assert (ok);
48072 if (d->testing_p)
48073 return true;
48075 /* Next we put permuted elements into their positions. */
48076 dcopy1 = *d;
48077 if (which == 2)
48078 dcopy1.op1 = dcopy.target;
48079 else
48080 dcopy1.op0 = dcopy.target;
48082 for (i = 0; i < nelt; ++i)
48083 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48085 ok = expand_vec_perm_blend (&dcopy1);
48086 gcc_assert (ok);
48088 return true;
48091 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48093 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48094 a two vector permutation into a single vector permutation by using
48095 an interleave operation to merge the vectors. */
48097 static bool
48098 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48100 struct expand_vec_perm_d dremap, dfinal;
48101 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48102 unsigned HOST_WIDE_INT contents;
48103 unsigned char remap[2 * MAX_VECT_LEN];
48104 rtx_insn *seq;
48105 bool ok, same_halves = false;
48107 if (GET_MODE_SIZE (d->vmode) == 16)
48109 if (d->one_operand_p)
48110 return false;
48112 else if (GET_MODE_SIZE (d->vmode) == 32)
48114 if (!TARGET_AVX)
48115 return false;
48116 /* For 32-byte modes allow even d->one_operand_p.
48117 The lack of cross-lane shuffling in some instructions
48118 might prevent a single insn shuffle. */
48119 dfinal = *d;
48120 dfinal.testing_p = true;
48121 /* If expand_vec_perm_interleave3 can expand this into
48122 a 3 insn sequence, give up and let it be expanded as
48123 3 insn sequence. While that is one insn longer,
48124 it doesn't need a memory operand and in the common
48125 case that both interleave low and high permutations
48126 with the same operands are adjacent needs 4 insns
48127 for both after CSE. */
48128 if (expand_vec_perm_interleave3 (&dfinal))
48129 return false;
48131 else
48132 return false;
48134 /* Examine from whence the elements come. */
48135 contents = 0;
48136 for (i = 0; i < nelt; ++i)
48137 contents |= HOST_WIDE_INT_1U << d->perm[i];
48139 memset (remap, 0xff, sizeof (remap));
48140 dremap = *d;
48142 if (GET_MODE_SIZE (d->vmode) == 16)
48144 unsigned HOST_WIDE_INT h1, h2, h3, h4;
48146 /* Split the two input vectors into 4 halves. */
48147 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
48148 h2 = h1 << nelt2;
48149 h3 = h2 << nelt2;
48150 h4 = h3 << nelt2;
48152 /* If the elements from the low halves use interleave low, and similarly
48153 for interleave high. If the elements are from mis-matched halves, we
48154 can use shufps for V4SF/V4SI or do a DImode shuffle. */
48155 if ((contents & (h1 | h3)) == contents)
48157 /* punpckl* */
48158 for (i = 0; i < nelt2; ++i)
48160 remap[i] = i * 2;
48161 remap[i + nelt] = i * 2 + 1;
48162 dremap.perm[i * 2] = i;
48163 dremap.perm[i * 2 + 1] = i + nelt;
48165 if (!TARGET_SSE2 && d->vmode == V4SImode)
48166 dremap.vmode = V4SFmode;
48168 else if ((contents & (h2 | h4)) == contents)
48170 /* punpckh* */
48171 for (i = 0; i < nelt2; ++i)
48173 remap[i + nelt2] = i * 2;
48174 remap[i + nelt + nelt2] = i * 2 + 1;
48175 dremap.perm[i * 2] = i + nelt2;
48176 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48178 if (!TARGET_SSE2 && d->vmode == V4SImode)
48179 dremap.vmode = V4SFmode;
48181 else if ((contents & (h1 | h4)) == contents)
48183 /* shufps */
48184 for (i = 0; i < nelt2; ++i)
48186 remap[i] = i;
48187 remap[i + nelt + nelt2] = i + nelt2;
48188 dremap.perm[i] = i;
48189 dremap.perm[i + nelt2] = i + nelt + nelt2;
48191 if (nelt != 4)
48193 /* shufpd */
48194 dremap.vmode = V2DImode;
48195 dremap.nelt = 2;
48196 dremap.perm[0] = 0;
48197 dremap.perm[1] = 3;
48200 else if ((contents & (h2 | h3)) == contents)
48202 /* shufps */
48203 for (i = 0; i < nelt2; ++i)
48205 remap[i + nelt2] = i;
48206 remap[i + nelt] = i + nelt2;
48207 dremap.perm[i] = i + nelt2;
48208 dremap.perm[i + nelt2] = i + nelt;
48210 if (nelt != 4)
48212 /* shufpd */
48213 dremap.vmode = V2DImode;
48214 dremap.nelt = 2;
48215 dremap.perm[0] = 1;
48216 dremap.perm[1] = 2;
48219 else
48220 return false;
48222 else
48224 unsigned int nelt4 = nelt / 4, nzcnt = 0;
48225 unsigned HOST_WIDE_INT q[8];
48226 unsigned int nonzero_halves[4];
48228 /* Split the two input vectors into 8 quarters. */
48229 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
48230 for (i = 1; i < 8; ++i)
48231 q[i] = q[0] << (nelt4 * i);
48232 for (i = 0; i < 4; ++i)
48233 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48235 nonzero_halves[nzcnt] = i;
48236 ++nzcnt;
48239 if (nzcnt == 1)
48241 gcc_assert (d->one_operand_p);
48242 nonzero_halves[1] = nonzero_halves[0];
48243 same_halves = true;
48245 else if (d->one_operand_p)
48247 gcc_assert (nonzero_halves[0] == 0);
48248 gcc_assert (nonzero_halves[1] == 1);
48251 if (nzcnt <= 2)
48253 if (d->perm[0] / nelt2 == nonzero_halves[1])
48255 /* Attempt to increase the likelihood that dfinal
48256 shuffle will be intra-lane. */
48257 std::swap (nonzero_halves[0], nonzero_halves[1]);
48260 /* vperm2f128 or vperm2i128. */
48261 for (i = 0; i < nelt2; ++i)
48263 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48264 remap[i + nonzero_halves[0] * nelt2] = i;
48265 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48266 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48269 if (d->vmode != V8SFmode
48270 && d->vmode != V4DFmode
48271 && d->vmode != V8SImode)
48273 dremap.vmode = V8SImode;
48274 dremap.nelt = 8;
48275 for (i = 0; i < 4; ++i)
48277 dremap.perm[i] = i + nonzero_halves[0] * 4;
48278 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48282 else if (d->one_operand_p)
48283 return false;
48284 else if (TARGET_AVX2
48285 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48287 /* vpunpckl* */
48288 for (i = 0; i < nelt4; ++i)
48290 remap[i] = i * 2;
48291 remap[i + nelt] = i * 2 + 1;
48292 remap[i + nelt2] = i * 2 + nelt2;
48293 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48294 dremap.perm[i * 2] = i;
48295 dremap.perm[i * 2 + 1] = i + nelt;
48296 dremap.perm[i * 2 + nelt2] = i + nelt2;
48297 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48300 else if (TARGET_AVX2
48301 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48303 /* vpunpckh* */
48304 for (i = 0; i < nelt4; ++i)
48306 remap[i + nelt4] = i * 2;
48307 remap[i + nelt + nelt4] = i * 2 + 1;
48308 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48309 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48310 dremap.perm[i * 2] = i + nelt4;
48311 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48312 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48313 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48316 else
48317 return false;
48320 /* Use the remapping array set up above to move the elements from their
48321 swizzled locations into their final destinations. */
48322 dfinal = *d;
48323 for (i = 0; i < nelt; ++i)
48325 unsigned e = remap[d->perm[i]];
48326 gcc_assert (e < nelt);
48327 /* If same_halves is true, both halves of the remapped vector are the
48328 same. Avoid cross-lane accesses if possible. */
48329 if (same_halves && i >= nelt2)
48331 gcc_assert (e < nelt2);
48332 dfinal.perm[i] = e + nelt2;
48334 else
48335 dfinal.perm[i] = e;
48337 if (!d->testing_p)
48339 dremap.target = gen_reg_rtx (dremap.vmode);
48340 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48342 dfinal.op1 = dfinal.op0;
48343 dfinal.one_operand_p = true;
48345 /* Test if the final remap can be done with a single insn. For V4SFmode or
48346 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48347 start_sequence ();
48348 ok = expand_vec_perm_1 (&dfinal);
48349 seq = get_insns ();
48350 end_sequence ();
48352 if (!ok)
48353 return false;
48355 if (d->testing_p)
48356 return true;
48358 if (dremap.vmode != dfinal.vmode)
48360 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48361 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48364 ok = expand_vec_perm_1 (&dremap);
48365 gcc_assert (ok);
48367 emit_insn (seq);
48368 return true;
48371 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48372 a single vector cross-lane permutation into vpermq followed
48373 by any of the single insn permutations. */
48375 static bool
48376 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48378 struct expand_vec_perm_d dremap, dfinal;
48379 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48380 unsigned contents[2];
48381 bool ok;
48383 if (!(TARGET_AVX2
48384 && (d->vmode == V32QImode || d->vmode == V16HImode)
48385 && d->one_operand_p))
48386 return false;
48388 contents[0] = 0;
48389 contents[1] = 0;
48390 for (i = 0; i < nelt2; ++i)
48392 contents[0] |= 1u << (d->perm[i] / nelt4);
48393 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48396 for (i = 0; i < 2; ++i)
48398 unsigned int cnt = 0;
48399 for (j = 0; j < 4; ++j)
48400 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48401 return false;
48404 if (d->testing_p)
48405 return true;
48407 dremap = *d;
48408 dremap.vmode = V4DImode;
48409 dremap.nelt = 4;
48410 dremap.target = gen_reg_rtx (V4DImode);
48411 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48412 dremap.op1 = dremap.op0;
48413 dremap.one_operand_p = true;
48414 for (i = 0; i < 2; ++i)
48416 unsigned int cnt = 0;
48417 for (j = 0; j < 4; ++j)
48418 if ((contents[i] & (1u << j)) != 0)
48419 dremap.perm[2 * i + cnt++] = j;
48420 for (; cnt < 2; ++cnt)
48421 dremap.perm[2 * i + cnt] = 0;
48424 dfinal = *d;
48425 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48426 dfinal.op1 = dfinal.op0;
48427 dfinal.one_operand_p = true;
48428 for (i = 0, j = 0; i < nelt; ++i)
48430 if (i == nelt2)
48431 j = 2;
48432 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48433 if ((d->perm[i] / nelt4) == dremap.perm[j])
48435 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48436 dfinal.perm[i] |= nelt4;
48437 else
48438 gcc_unreachable ();
48441 ok = expand_vec_perm_1 (&dremap);
48442 gcc_assert (ok);
48444 ok = expand_vec_perm_1 (&dfinal);
48445 gcc_assert (ok);
48447 return true;
48450 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48451 a vector permutation using two instructions, vperm2f128 resp.
48452 vperm2i128 followed by any single in-lane permutation. */
48454 static bool
48455 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48457 struct expand_vec_perm_d dfirst, dsecond;
48458 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48459 bool ok;
48461 if (!TARGET_AVX
48462 || GET_MODE_SIZE (d->vmode) != 32
48463 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48464 return false;
48466 dsecond = *d;
48467 dsecond.one_operand_p = false;
48468 dsecond.testing_p = true;
48470 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48471 immediate. For perm < 16 the second permutation uses
48472 d->op0 as first operand, for perm >= 16 it uses d->op1
48473 as first operand. The second operand is the result of
48474 vperm2[fi]128. */
48475 for (perm = 0; perm < 32; perm++)
48477 /* Ignore permutations which do not move anything cross-lane. */
48478 if (perm < 16)
48480 /* The second shuffle for e.g. V4DFmode has
48481 0123 and ABCD operands.
48482 Ignore AB23, as 23 is already in the second lane
48483 of the first operand. */
48484 if ((perm & 0xc) == (1 << 2)) continue;
48485 /* And 01CD, as 01 is in the first lane of the first
48486 operand. */
48487 if ((perm & 3) == 0) continue;
48488 /* And 4567, as then the vperm2[fi]128 doesn't change
48489 anything on the original 4567 second operand. */
48490 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48492 else
48494 /* The second shuffle for e.g. V4DFmode has
48495 4567 and ABCD operands.
48496 Ignore AB67, as 67 is already in the second lane
48497 of the first operand. */
48498 if ((perm & 0xc) == (3 << 2)) continue;
48499 /* And 45CD, as 45 is in the first lane of the first
48500 operand. */
48501 if ((perm & 3) == 2) continue;
48502 /* And 0123, as then the vperm2[fi]128 doesn't change
48503 anything on the original 0123 first operand. */
48504 if ((perm & 0xf) == (1 << 2)) continue;
48507 for (i = 0; i < nelt; i++)
48509 j = d->perm[i] / nelt2;
48510 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48511 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48512 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48513 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48514 else
48515 break;
48518 if (i == nelt)
48520 start_sequence ();
48521 ok = expand_vec_perm_1 (&dsecond);
48522 end_sequence ();
48524 else
48525 ok = false;
48527 if (ok)
48529 if (d->testing_p)
48530 return true;
48532 /* Found a usable second shuffle. dfirst will be
48533 vperm2f128 on d->op0 and d->op1. */
48534 dsecond.testing_p = false;
48535 dfirst = *d;
48536 dfirst.target = gen_reg_rtx (d->vmode);
48537 for (i = 0; i < nelt; i++)
48538 dfirst.perm[i] = (i & (nelt2 - 1))
48539 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48541 canonicalize_perm (&dfirst);
48542 ok = expand_vec_perm_1 (&dfirst);
48543 gcc_assert (ok);
48545 /* And dsecond is some single insn shuffle, taking
48546 d->op0 and result of vperm2f128 (if perm < 16) or
48547 d->op1 and result of vperm2f128 (otherwise). */
48548 if (perm >= 16)
48549 dsecond.op0 = dsecond.op1;
48550 dsecond.op1 = dfirst.target;
48552 ok = expand_vec_perm_1 (&dsecond);
48553 gcc_assert (ok);
48555 return true;
48558 /* For one operand, the only useful vperm2f128 permutation is 0x01
48559 aka lanes swap. */
48560 if (d->one_operand_p)
48561 return false;
48564 return false;
48567 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48568 a two vector permutation using 2 intra-lane interleave insns
48569 and cross-lane shuffle for 32-byte vectors. */
48571 static bool
48572 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48574 unsigned i, nelt;
48575 rtx (*gen) (rtx, rtx, rtx);
48577 if (d->one_operand_p)
48578 return false;
48579 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48581 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48583 else
48584 return false;
48586 nelt = d->nelt;
48587 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48588 return false;
48589 for (i = 0; i < nelt; i += 2)
48590 if (d->perm[i] != d->perm[0] + i / 2
48591 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48592 return false;
48594 if (d->testing_p)
48595 return true;
48597 switch (d->vmode)
48599 case V32QImode:
48600 if (d->perm[0])
48601 gen = gen_vec_interleave_highv32qi;
48602 else
48603 gen = gen_vec_interleave_lowv32qi;
48604 break;
48605 case V16HImode:
48606 if (d->perm[0])
48607 gen = gen_vec_interleave_highv16hi;
48608 else
48609 gen = gen_vec_interleave_lowv16hi;
48610 break;
48611 case V8SImode:
48612 if (d->perm[0])
48613 gen = gen_vec_interleave_highv8si;
48614 else
48615 gen = gen_vec_interleave_lowv8si;
48616 break;
48617 case V4DImode:
48618 if (d->perm[0])
48619 gen = gen_vec_interleave_highv4di;
48620 else
48621 gen = gen_vec_interleave_lowv4di;
48622 break;
48623 case V8SFmode:
48624 if (d->perm[0])
48625 gen = gen_vec_interleave_highv8sf;
48626 else
48627 gen = gen_vec_interleave_lowv8sf;
48628 break;
48629 case V4DFmode:
48630 if (d->perm[0])
48631 gen = gen_vec_interleave_highv4df;
48632 else
48633 gen = gen_vec_interleave_lowv4df;
48634 break;
48635 default:
48636 gcc_unreachable ();
48639 emit_insn (gen (d->target, d->op0, d->op1));
48640 return true;
48643 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48644 a single vector permutation using a single intra-lane vector
48645 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48646 the non-swapped and swapped vectors together. */
48648 static bool
48649 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48651 struct expand_vec_perm_d dfirst, dsecond;
48652 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48653 rtx_insn *seq;
48654 bool ok;
48655 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48657 if (!TARGET_AVX
48658 || TARGET_AVX2
48659 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48660 || !d->one_operand_p)
48661 return false;
48663 dfirst = *d;
48664 for (i = 0; i < nelt; i++)
48665 dfirst.perm[i] = 0xff;
48666 for (i = 0, msk = 0; i < nelt; i++)
48668 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48669 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48670 return false;
48671 dfirst.perm[j] = d->perm[i];
48672 if (j != i)
48673 msk |= (1 << i);
48675 for (i = 0; i < nelt; i++)
48676 if (dfirst.perm[i] == 0xff)
48677 dfirst.perm[i] = i;
48679 if (!d->testing_p)
48680 dfirst.target = gen_reg_rtx (dfirst.vmode);
48682 start_sequence ();
48683 ok = expand_vec_perm_1 (&dfirst);
48684 seq = get_insns ();
48685 end_sequence ();
48687 if (!ok)
48688 return false;
48690 if (d->testing_p)
48691 return true;
48693 emit_insn (seq);
48695 dsecond = *d;
48696 dsecond.op0 = dfirst.target;
48697 dsecond.op1 = dfirst.target;
48698 dsecond.one_operand_p = true;
48699 dsecond.target = gen_reg_rtx (dsecond.vmode);
48700 for (i = 0; i < nelt; i++)
48701 dsecond.perm[i] = i ^ nelt2;
48703 ok = expand_vec_perm_1 (&dsecond);
48704 gcc_assert (ok);
48706 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48707 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48708 return true;
48711 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48712 permutation using two vperm2f128, followed by a vshufpd insn blending
48713 the two vectors together. */
48715 static bool
48716 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48718 struct expand_vec_perm_d dfirst, dsecond, dthird;
48719 bool ok;
48721 if (!TARGET_AVX || (d->vmode != V4DFmode))
48722 return false;
48724 if (d->testing_p)
48725 return true;
48727 dfirst = *d;
48728 dsecond = *d;
48729 dthird = *d;
48731 dfirst.perm[0] = (d->perm[0] & ~1);
48732 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48733 dfirst.perm[2] = (d->perm[2] & ~1);
48734 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48735 dsecond.perm[0] = (d->perm[1] & ~1);
48736 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48737 dsecond.perm[2] = (d->perm[3] & ~1);
48738 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48739 dthird.perm[0] = (d->perm[0] % 2);
48740 dthird.perm[1] = (d->perm[1] % 2) + 4;
48741 dthird.perm[2] = (d->perm[2] % 2) + 2;
48742 dthird.perm[3] = (d->perm[3] % 2) + 6;
48744 dfirst.target = gen_reg_rtx (dfirst.vmode);
48745 dsecond.target = gen_reg_rtx (dsecond.vmode);
48746 dthird.op0 = dfirst.target;
48747 dthird.op1 = dsecond.target;
48748 dthird.one_operand_p = false;
48750 canonicalize_perm (&dfirst);
48751 canonicalize_perm (&dsecond);
48753 ok = expand_vec_perm_1 (&dfirst)
48754 && expand_vec_perm_1 (&dsecond)
48755 && expand_vec_perm_1 (&dthird);
48757 gcc_assert (ok);
48759 return true;
48762 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48763 permutation with two pshufb insns and an ior. We should have already
48764 failed all two instruction sequences. */
48766 static bool
48767 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48769 rtx rperm[2][16], vperm, l, h, op, m128;
48770 unsigned int i, nelt, eltsz;
48772 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48773 return false;
48774 gcc_assert (!d->one_operand_p);
48776 if (d->testing_p)
48777 return true;
48779 nelt = d->nelt;
48780 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48782 /* Generate two permutation masks. If the required element is within
48783 the given vector it is shuffled into the proper lane. If the required
48784 element is in the other vector, force a zero into the lane by setting
48785 bit 7 in the permutation mask. */
48786 m128 = GEN_INT (-128);
48787 for (i = 0; i < nelt; ++i)
48789 unsigned j, e = d->perm[i];
48790 unsigned which = (e >= nelt);
48791 if (e >= nelt)
48792 e -= nelt;
48794 for (j = 0; j < eltsz; ++j)
48796 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48797 rperm[1-which][i*eltsz + j] = m128;
48801 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48802 vperm = force_reg (V16QImode, vperm);
48804 l = gen_reg_rtx (V16QImode);
48805 op = gen_lowpart (V16QImode, d->op0);
48806 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48808 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48809 vperm = force_reg (V16QImode, vperm);
48811 h = gen_reg_rtx (V16QImode);
48812 op = gen_lowpart (V16QImode, d->op1);
48813 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48815 op = d->target;
48816 if (d->vmode != V16QImode)
48817 op = gen_reg_rtx (V16QImode);
48818 emit_insn (gen_iorv16qi3 (op, l, h));
48819 if (op != d->target)
48820 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48822 return true;
48825 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48826 with two vpshufb insns, vpermq and vpor. We should have already failed
48827 all two or three instruction sequences. */
48829 static bool
48830 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48832 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48833 unsigned int i, nelt, eltsz;
48835 if (!TARGET_AVX2
48836 || !d->one_operand_p
48837 || (d->vmode != V32QImode && d->vmode != V16HImode))
48838 return false;
48840 if (d->testing_p)
48841 return true;
48843 nelt = d->nelt;
48844 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48846 /* Generate two permutation masks. If the required element is within
48847 the same lane, it is shuffled in. If the required element from the
48848 other lane, force a zero by setting bit 7 in the permutation mask.
48849 In the other mask the mask has non-negative elements if element
48850 is requested from the other lane, but also moved to the other lane,
48851 so that the result of vpshufb can have the two V2TImode halves
48852 swapped. */
48853 m128 = GEN_INT (-128);
48854 for (i = 0; i < nelt; ++i)
48856 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48857 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48859 for (j = 0; j < eltsz; ++j)
48861 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48862 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48866 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48867 vperm = force_reg (V32QImode, vperm);
48869 h = gen_reg_rtx (V32QImode);
48870 op = gen_lowpart (V32QImode, d->op0);
48871 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48873 /* Swap the 128-byte lanes of h into hp. */
48874 hp = gen_reg_rtx (V4DImode);
48875 op = gen_lowpart (V4DImode, h);
48876 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48877 const1_rtx));
48879 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48880 vperm = force_reg (V32QImode, vperm);
48882 l = gen_reg_rtx (V32QImode);
48883 op = gen_lowpart (V32QImode, d->op0);
48884 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48886 op = d->target;
48887 if (d->vmode != V32QImode)
48888 op = gen_reg_rtx (V32QImode);
48889 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48890 if (op != d->target)
48891 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48893 return true;
48896 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48897 and extract-odd permutations of two V32QImode and V16QImode operand
48898 with two vpshufb insns, vpor and vpermq. We should have already
48899 failed all two or three instruction sequences. */
48901 static bool
48902 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48904 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48905 unsigned int i, nelt, eltsz;
48907 if (!TARGET_AVX2
48908 || d->one_operand_p
48909 || (d->vmode != V32QImode && d->vmode != V16HImode))
48910 return false;
48912 for (i = 0; i < d->nelt; ++i)
48913 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48914 return false;
48916 if (d->testing_p)
48917 return true;
48919 nelt = d->nelt;
48920 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
48922 /* Generate two permutation masks. In the first permutation mask
48923 the first quarter will contain indexes for the first half
48924 of the op0, the second quarter will contain bit 7 set, third quarter
48925 will contain indexes for the second half of the op0 and the
48926 last quarter bit 7 set. In the second permutation mask
48927 the first quarter will contain bit 7 set, the second quarter
48928 indexes for the first half of the op1, the third quarter bit 7 set
48929 and last quarter indexes for the second half of the op1.
48930 I.e. the first mask e.g. for V32QImode extract even will be:
48931 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48932 (all values masked with 0xf except for -128) and second mask
48933 for extract even will be
48934 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48935 m128 = GEN_INT (-128);
48936 for (i = 0; i < nelt; ++i)
48938 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48939 unsigned which = d->perm[i] >= nelt;
48940 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48942 for (j = 0; j < eltsz; ++j)
48944 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48945 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48949 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48950 vperm = force_reg (V32QImode, vperm);
48952 l = gen_reg_rtx (V32QImode);
48953 op = gen_lowpart (V32QImode, d->op0);
48954 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48956 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48957 vperm = force_reg (V32QImode, vperm);
48959 h = gen_reg_rtx (V32QImode);
48960 op = gen_lowpart (V32QImode, d->op1);
48961 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48963 ior = gen_reg_rtx (V32QImode);
48964 emit_insn (gen_iorv32qi3 (ior, l, h));
48966 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
48967 op = gen_reg_rtx (V4DImode);
48968 ior = gen_lowpart (V4DImode, ior);
48969 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48970 const1_rtx, GEN_INT (3)));
48971 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48973 return true;
48976 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48977 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48978 with two "and" and "pack" or two "shift" and "pack" insns. We should
48979 have already failed all two instruction sequences. */
48981 static bool
48982 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48984 rtx op, dop0, dop1, t, rperm[16];
48985 unsigned i, odd, c, s, nelt = d->nelt;
48986 bool end_perm = false;
48987 machine_mode half_mode;
48988 rtx (*gen_and) (rtx, rtx, rtx);
48989 rtx (*gen_pack) (rtx, rtx, rtx);
48990 rtx (*gen_shift) (rtx, rtx, rtx);
48992 if (d->one_operand_p)
48993 return false;
48995 switch (d->vmode)
48997 case V8HImode:
48998 /* Required for "pack". */
48999 if (!TARGET_SSE4_1)
49000 return false;
49001 c = 0xffff;
49002 s = 16;
49003 half_mode = V4SImode;
49004 gen_and = gen_andv4si3;
49005 gen_pack = gen_sse4_1_packusdw;
49006 gen_shift = gen_lshrv4si3;
49007 break;
49008 case V16QImode:
49009 /* No check as all instructions are SSE2. */
49010 c = 0xff;
49011 s = 8;
49012 half_mode = V8HImode;
49013 gen_and = gen_andv8hi3;
49014 gen_pack = gen_sse2_packuswb;
49015 gen_shift = gen_lshrv8hi3;
49016 break;
49017 case V16HImode:
49018 if (!TARGET_AVX2)
49019 return false;
49020 c = 0xffff;
49021 s = 16;
49022 half_mode = V8SImode;
49023 gen_and = gen_andv8si3;
49024 gen_pack = gen_avx2_packusdw;
49025 gen_shift = gen_lshrv8si3;
49026 end_perm = true;
49027 break;
49028 case V32QImode:
49029 if (!TARGET_AVX2)
49030 return false;
49031 c = 0xff;
49032 s = 8;
49033 half_mode = V16HImode;
49034 gen_and = gen_andv16hi3;
49035 gen_pack = gen_avx2_packuswb;
49036 gen_shift = gen_lshrv16hi3;
49037 end_perm = true;
49038 break;
49039 default:
49040 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49041 general shuffles. */
49042 return false;
49045 /* Check that permutation is even or odd. */
49046 odd = d->perm[0];
49047 if (odd > 1)
49048 return false;
49050 for (i = 1; i < nelt; ++i)
49051 if (d->perm[i] != 2 * i + odd)
49052 return false;
49054 if (d->testing_p)
49055 return true;
49057 dop0 = gen_reg_rtx (half_mode);
49058 dop1 = gen_reg_rtx (half_mode);
49059 if (odd == 0)
49061 for (i = 0; i < nelt / 2; i++)
49062 rperm[i] = GEN_INT (c);
49063 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49064 t = force_reg (half_mode, t);
49065 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49066 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49068 else
49070 emit_insn (gen_shift (dop0,
49071 gen_lowpart (half_mode, d->op0),
49072 GEN_INT (s)));
49073 emit_insn (gen_shift (dop1,
49074 gen_lowpart (half_mode, d->op1),
49075 GEN_INT (s)));
49077 /* In AVX2 for 256 bit case we need to permute pack result. */
49078 if (TARGET_AVX2 && end_perm)
49080 op = gen_reg_rtx (d->vmode);
49081 t = gen_reg_rtx (V4DImode);
49082 emit_insn (gen_pack (op, dop0, dop1));
49083 emit_insn (gen_avx2_permv4di_1 (t,
49084 gen_lowpart (V4DImode, op),
49085 const0_rtx,
49086 const2_rtx,
49087 const1_rtx,
49088 GEN_INT (3)));
49089 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49091 else
49092 emit_insn (gen_pack (d->target, dop0, dop1));
49094 return true;
49097 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
49098 and extract-odd permutations. */
49100 static bool
49101 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49103 rtx t1, t2, t3, t4, t5;
49105 switch (d->vmode)
49107 case V4DFmode:
49108 if (d->testing_p)
49109 break;
49110 t1 = gen_reg_rtx (V4DFmode);
49111 t2 = gen_reg_rtx (V4DFmode);
49113 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49114 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49115 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49117 /* Now an unpck[lh]pd will produce the result required. */
49118 if (odd)
49119 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49120 else
49121 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49122 emit_insn (t3);
49123 break;
49125 case V8SFmode:
49127 int mask = odd ? 0xdd : 0x88;
49129 if (d->testing_p)
49130 break;
49131 t1 = gen_reg_rtx (V8SFmode);
49132 t2 = gen_reg_rtx (V8SFmode);
49133 t3 = gen_reg_rtx (V8SFmode);
49135 /* Shuffle within the 128-bit lanes to produce:
49136 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
49137 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49138 GEN_INT (mask)));
49140 /* Shuffle the lanes around to produce:
49141 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
49142 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49143 GEN_INT (0x3)));
49145 /* Shuffle within the 128-bit lanes to produce:
49146 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
49147 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49149 /* Shuffle within the 128-bit lanes to produce:
49150 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
49151 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49153 /* Shuffle the lanes around to produce:
49154 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
49155 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49156 GEN_INT (0x20)));
49158 break;
49160 case V2DFmode:
49161 case V4SFmode:
49162 case V2DImode:
49163 case V4SImode:
49164 /* These are always directly implementable by expand_vec_perm_1. */
49165 gcc_unreachable ();
49167 case V8HImode:
49168 if (TARGET_SSE4_1)
49169 return expand_vec_perm_even_odd_pack (d);
49170 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49171 return expand_vec_perm_pshufb2 (d);
49172 else
49174 if (d->testing_p)
49175 break;
49176 /* We need 2*log2(N)-1 operations to achieve odd/even
49177 with interleave. */
49178 t1 = gen_reg_rtx (V8HImode);
49179 t2 = gen_reg_rtx (V8HImode);
49180 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49181 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49182 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49183 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49184 if (odd)
49185 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49186 else
49187 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49188 emit_insn (t3);
49190 break;
49192 case V16QImode:
49193 return expand_vec_perm_even_odd_pack (d);
49195 case V16HImode:
49196 case V32QImode:
49197 return expand_vec_perm_even_odd_pack (d);
49199 case V4DImode:
49200 if (!TARGET_AVX2)
49202 struct expand_vec_perm_d d_copy = *d;
49203 d_copy.vmode = V4DFmode;
49204 if (d->testing_p)
49205 d_copy.target = gen_lowpart (V4DFmode, d->target);
49206 else
49207 d_copy.target = gen_reg_rtx (V4DFmode);
49208 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49209 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49210 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49212 if (!d->testing_p)
49213 emit_move_insn (d->target,
49214 gen_lowpart (V4DImode, d_copy.target));
49215 return true;
49217 return false;
49220 if (d->testing_p)
49221 break;
49223 t1 = gen_reg_rtx (V4DImode);
49224 t2 = gen_reg_rtx (V4DImode);
49226 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49227 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49228 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49230 /* Now an vpunpck[lh]qdq will produce the result required. */
49231 if (odd)
49232 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49233 else
49234 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49235 emit_insn (t3);
49236 break;
49238 case V8SImode:
49239 if (!TARGET_AVX2)
49241 struct expand_vec_perm_d d_copy = *d;
49242 d_copy.vmode = V8SFmode;
49243 if (d->testing_p)
49244 d_copy.target = gen_lowpart (V8SFmode, d->target);
49245 else
49246 d_copy.target = gen_reg_rtx (V8SFmode);
49247 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49248 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49249 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49251 if (!d->testing_p)
49252 emit_move_insn (d->target,
49253 gen_lowpart (V8SImode, d_copy.target));
49254 return true;
49256 return false;
49259 if (d->testing_p)
49260 break;
49262 t1 = gen_reg_rtx (V8SImode);
49263 t2 = gen_reg_rtx (V8SImode);
49264 t3 = gen_reg_rtx (V4DImode);
49265 t4 = gen_reg_rtx (V4DImode);
49266 t5 = gen_reg_rtx (V4DImode);
49268 /* Shuffle the lanes around into
49269 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49270 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49271 gen_lowpart (V4DImode, d->op1),
49272 GEN_INT (0x20)));
49273 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49274 gen_lowpart (V4DImode, d->op1),
49275 GEN_INT (0x31)));
49277 /* Swap the 2nd and 3rd position in each lane into
49278 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49279 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49280 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49281 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49282 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49284 /* Now an vpunpck[lh]qdq will produce
49285 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49286 if (odd)
49287 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49288 gen_lowpart (V4DImode, t2));
49289 else
49290 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49291 gen_lowpart (V4DImode, t2));
49292 emit_insn (t3);
49293 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49294 break;
49296 default:
49297 gcc_unreachable ();
49300 return true;
49303 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49304 extract-even and extract-odd permutations. */
49306 static bool
49307 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49309 unsigned i, odd, nelt = d->nelt;
49311 odd = d->perm[0];
49312 if (odd != 0 && odd != 1)
49313 return false;
49315 for (i = 1; i < nelt; ++i)
49316 if (d->perm[i] != 2 * i + odd)
49317 return false;
49319 return expand_vec_perm_even_odd_1 (d, odd);
49322 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49323 permutations. We assume that expand_vec_perm_1 has already failed. */
49325 static bool
49326 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49328 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49329 machine_mode vmode = d->vmode;
49330 unsigned char perm2[4];
49331 rtx op0 = d->op0, dest;
49332 bool ok;
49334 switch (vmode)
49336 case V4DFmode:
49337 case V8SFmode:
49338 /* These are special-cased in sse.md so that we can optionally
49339 use the vbroadcast instruction. They expand to two insns
49340 if the input happens to be in a register. */
49341 gcc_unreachable ();
49343 case V2DFmode:
49344 case V2DImode:
49345 case V4SFmode:
49346 case V4SImode:
49347 /* These are always implementable using standard shuffle patterns. */
49348 gcc_unreachable ();
49350 case V8HImode:
49351 case V16QImode:
49352 /* These can be implemented via interleave. We save one insn by
49353 stopping once we have promoted to V4SImode and then use pshufd. */
49354 if (d->testing_p)
49355 return true;
49358 rtx dest;
49359 rtx (*gen) (rtx, rtx, rtx)
49360 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49361 : gen_vec_interleave_lowv8hi;
49363 if (elt >= nelt2)
49365 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49366 : gen_vec_interleave_highv8hi;
49367 elt -= nelt2;
49369 nelt2 /= 2;
49371 dest = gen_reg_rtx (vmode);
49372 emit_insn (gen (dest, op0, op0));
49373 vmode = get_mode_wider_vector (vmode);
49374 op0 = gen_lowpart (vmode, dest);
49376 while (vmode != V4SImode);
49378 memset (perm2, elt, 4);
49379 dest = gen_reg_rtx (V4SImode);
49380 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49381 gcc_assert (ok);
49382 if (!d->testing_p)
49383 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49384 return true;
49386 case V64QImode:
49387 case V32QImode:
49388 case V16HImode:
49389 case V8SImode:
49390 case V4DImode:
49391 /* For AVX2 broadcasts of the first element vpbroadcast* or
49392 vpermq should be used by expand_vec_perm_1. */
49393 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49394 return false;
49396 default:
49397 gcc_unreachable ();
49401 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49402 broadcast permutations. */
49404 static bool
49405 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49407 unsigned i, elt, nelt = d->nelt;
49409 if (!d->one_operand_p)
49410 return false;
49412 elt = d->perm[0];
49413 for (i = 1; i < nelt; ++i)
49414 if (d->perm[i] != elt)
49415 return false;
49417 return expand_vec_perm_broadcast_1 (d);
49420 /* Implement arbitrary permutations of two V64QImode operands
49421 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49422 static bool
49423 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49425 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49426 return false;
49428 if (d->testing_p)
49429 return true;
49431 struct expand_vec_perm_d ds[2];
49432 rtx rperm[128], vperm, target0, target1;
49433 unsigned int i, nelt;
49434 machine_mode vmode;
49436 nelt = d->nelt;
49437 vmode = V64QImode;
49439 for (i = 0; i < 2; i++)
49441 ds[i] = *d;
49442 ds[i].vmode = V32HImode;
49443 ds[i].nelt = 32;
49444 ds[i].target = gen_reg_rtx (V32HImode);
49445 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49446 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49449 /* Prepare permutations such that the first one takes care of
49450 putting the even bytes into the right positions or one higher
49451 positions (ds[0]) and the second one takes care of
49452 putting the odd bytes into the right positions or one below
49453 (ds[1]). */
49455 for (i = 0; i < nelt; i++)
49457 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49458 if (i & 1)
49460 rperm[i] = constm1_rtx;
49461 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49463 else
49465 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49466 rperm[i + 64] = constm1_rtx;
49470 bool ok = expand_vec_perm_1 (&ds[0]);
49471 gcc_assert (ok);
49472 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49474 ok = expand_vec_perm_1 (&ds[1]);
49475 gcc_assert (ok);
49476 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49478 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49479 vperm = force_reg (vmode, vperm);
49480 target0 = gen_reg_rtx (V64QImode);
49481 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49483 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49484 vperm = force_reg (vmode, vperm);
49485 target1 = gen_reg_rtx (V64QImode);
49486 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49488 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49489 return true;
49492 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49493 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49494 all the shorter instruction sequences. */
49496 static bool
49497 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49499 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49500 unsigned int i, nelt, eltsz;
49501 bool used[4];
49503 if (!TARGET_AVX2
49504 || d->one_operand_p
49505 || (d->vmode != V32QImode && d->vmode != V16HImode))
49506 return false;
49508 if (d->testing_p)
49509 return true;
49511 nelt = d->nelt;
49512 eltsz = GET_MODE_UNIT_SIZE (d->vmode);
49514 /* Generate 4 permutation masks. If the required element is within
49515 the same lane, it is shuffled in. If the required element from the
49516 other lane, force a zero by setting bit 7 in the permutation mask.
49517 In the other mask the mask has non-negative elements if element
49518 is requested from the other lane, but also moved to the other lane,
49519 so that the result of vpshufb can have the two V2TImode halves
49520 swapped. */
49521 m128 = GEN_INT (-128);
49522 for (i = 0; i < 32; ++i)
49524 rperm[0][i] = m128;
49525 rperm[1][i] = m128;
49526 rperm[2][i] = m128;
49527 rperm[3][i] = m128;
49529 used[0] = false;
49530 used[1] = false;
49531 used[2] = false;
49532 used[3] = false;
49533 for (i = 0; i < nelt; ++i)
49535 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49536 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49537 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49539 for (j = 0; j < eltsz; ++j)
49540 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49541 used[which] = true;
49544 for (i = 0; i < 2; ++i)
49546 if (!used[2 * i + 1])
49548 h[i] = NULL_RTX;
49549 continue;
49551 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49552 gen_rtvec_v (32, rperm[2 * i + 1]));
49553 vperm = force_reg (V32QImode, vperm);
49554 h[i] = gen_reg_rtx (V32QImode);
49555 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49556 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49559 /* Swap the 128-byte lanes of h[X]. */
49560 for (i = 0; i < 2; ++i)
49562 if (h[i] == NULL_RTX)
49563 continue;
49564 op = gen_reg_rtx (V4DImode);
49565 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49566 const2_rtx, GEN_INT (3), const0_rtx,
49567 const1_rtx));
49568 h[i] = gen_lowpart (V32QImode, op);
49571 for (i = 0; i < 2; ++i)
49573 if (!used[2 * i])
49575 l[i] = NULL_RTX;
49576 continue;
49578 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49579 vperm = force_reg (V32QImode, vperm);
49580 l[i] = gen_reg_rtx (V32QImode);
49581 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49582 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49585 for (i = 0; i < 2; ++i)
49587 if (h[i] && l[i])
49589 op = gen_reg_rtx (V32QImode);
49590 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49591 l[i] = op;
49593 else if (h[i])
49594 l[i] = h[i];
49597 gcc_assert (l[0] && l[1]);
49598 op = d->target;
49599 if (d->vmode != V32QImode)
49600 op = gen_reg_rtx (V32QImode);
49601 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49602 if (op != d->target)
49603 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49604 return true;
49607 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49608 With all of the interface bits taken care of, perform the expansion
49609 in D and return true on success. */
49611 static bool
49612 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49614 /* Try a single instruction expansion. */
49615 if (expand_vec_perm_1 (d))
49616 return true;
49618 /* Try sequences of two instructions. */
49620 if (expand_vec_perm_pshuflw_pshufhw (d))
49621 return true;
49623 if (expand_vec_perm_palignr (d, false))
49624 return true;
49626 if (expand_vec_perm_interleave2 (d))
49627 return true;
49629 if (expand_vec_perm_broadcast (d))
49630 return true;
49632 if (expand_vec_perm_vpermq_perm_1 (d))
49633 return true;
49635 if (expand_vec_perm_vperm2f128 (d))
49636 return true;
49638 if (expand_vec_perm_pblendv (d))
49639 return true;
49641 /* Try sequences of three instructions. */
49643 if (expand_vec_perm_even_odd_pack (d))
49644 return true;
49646 if (expand_vec_perm_2vperm2f128_vshuf (d))
49647 return true;
49649 if (expand_vec_perm_pshufb2 (d))
49650 return true;
49652 if (expand_vec_perm_interleave3 (d))
49653 return true;
49655 if (expand_vec_perm_vperm2f128_vblend (d))
49656 return true;
49658 /* Try sequences of four instructions. */
49660 if (expand_vec_perm_vpshufb2_vpermq (d))
49661 return true;
49663 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49664 return true;
49666 if (expand_vec_perm_vpermi2_vpshub2 (d))
49667 return true;
49669 /* ??? Look for narrow permutations whose element orderings would
49670 allow the promotion to a wider mode. */
49672 /* ??? Look for sequences of interleave or a wider permute that place
49673 the data into the correct lanes for a half-vector shuffle like
49674 pshuf[lh]w or vpermilps. */
49676 /* ??? Look for sequences of interleave that produce the desired results.
49677 The combinatorics of punpck[lh] get pretty ugly... */
49679 if (expand_vec_perm_even_odd (d))
49680 return true;
49682 /* Even longer sequences. */
49683 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49684 return true;
49686 return false;
49689 /* If a permutation only uses one operand, make it clear. Returns true
49690 if the permutation references both operands. */
49692 static bool
49693 canonicalize_perm (struct expand_vec_perm_d *d)
49695 int i, which, nelt = d->nelt;
49697 for (i = which = 0; i < nelt; ++i)
49698 which |= (d->perm[i] < nelt ? 1 : 2);
49700 d->one_operand_p = true;
49701 switch (which)
49703 default:
49704 gcc_unreachable();
49706 case 3:
49707 if (!rtx_equal_p (d->op0, d->op1))
49709 d->one_operand_p = false;
49710 break;
49712 /* The elements of PERM do not suggest that only the first operand
49713 is used, but both operands are identical. Allow easier matching
49714 of the permutation by folding the permutation into the single
49715 input vector. */
49716 /* FALLTHRU */
49718 case 2:
49719 for (i = 0; i < nelt; ++i)
49720 d->perm[i] &= nelt - 1;
49721 d->op0 = d->op1;
49722 break;
49724 case 1:
49725 d->op1 = d->op0;
49726 break;
49729 return (which == 3);
49732 bool
49733 ix86_expand_vec_perm_const (rtx operands[4])
49735 struct expand_vec_perm_d d;
49736 unsigned char perm[MAX_VECT_LEN];
49737 int i, nelt;
49738 bool two_args;
49739 rtx sel;
49741 d.target = operands[0];
49742 d.op0 = operands[1];
49743 d.op1 = operands[2];
49744 sel = operands[3];
49746 d.vmode = GET_MODE (d.target);
49747 gcc_assert (VECTOR_MODE_P (d.vmode));
49748 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49749 d.testing_p = false;
49751 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49752 gcc_assert (XVECLEN (sel, 0) == nelt);
49753 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49755 for (i = 0; i < nelt; ++i)
49757 rtx e = XVECEXP (sel, 0, i);
49758 int ei = INTVAL (e) & (2 * nelt - 1);
49759 d.perm[i] = ei;
49760 perm[i] = ei;
49763 two_args = canonicalize_perm (&d);
49765 if (ix86_expand_vec_perm_const_1 (&d))
49766 return true;
49768 /* If the selector says both arguments are needed, but the operands are the
49769 same, the above tried to expand with one_operand_p and flattened selector.
49770 If that didn't work, retry without one_operand_p; we succeeded with that
49771 during testing. */
49772 if (two_args && d.one_operand_p)
49774 d.one_operand_p = false;
49775 memcpy (d.perm, perm, sizeof (perm));
49776 return ix86_expand_vec_perm_const_1 (&d);
49779 return false;
49782 /* Implement targetm.vectorize.vec_perm_const_ok. */
49784 static bool
49785 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49786 const unsigned char *sel)
49788 struct expand_vec_perm_d d;
49789 unsigned int i, nelt, which;
49790 bool ret;
49792 d.vmode = vmode;
49793 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49794 d.testing_p = true;
49796 /* Given sufficient ISA support we can just return true here
49797 for selected vector modes. */
49798 switch (d.vmode)
49800 case V16SFmode:
49801 case V16SImode:
49802 case V8DImode:
49803 case V8DFmode:
49804 if (TARGET_AVX512F)
49805 /* All implementable with a single vpermi2 insn. */
49806 return true;
49807 break;
49808 case V32HImode:
49809 if (TARGET_AVX512BW)
49810 /* All implementable with a single vpermi2 insn. */
49811 return true;
49812 break;
49813 case V64QImode:
49814 if (TARGET_AVX512BW)
49815 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49816 return true;
49817 break;
49818 case V8SImode:
49819 case V8SFmode:
49820 case V4DFmode:
49821 case V4DImode:
49822 if (TARGET_AVX512VL)
49823 /* All implementable with a single vpermi2 insn. */
49824 return true;
49825 break;
49826 case V16HImode:
49827 if (TARGET_AVX2)
49828 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49829 return true;
49830 break;
49831 case V32QImode:
49832 if (TARGET_AVX2)
49833 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49834 return true;
49835 break;
49836 case V4SImode:
49837 case V4SFmode:
49838 case V8HImode:
49839 case V16QImode:
49840 /* All implementable with a single vpperm insn. */
49841 if (TARGET_XOP)
49842 return true;
49843 /* All implementable with 2 pshufb + 1 ior. */
49844 if (TARGET_SSSE3)
49845 return true;
49846 break;
49847 case V2DImode:
49848 case V2DFmode:
49849 /* All implementable with shufpd or unpck[lh]pd. */
49850 return true;
49851 default:
49852 return false;
49855 /* Extract the values from the vector CST into the permutation
49856 array in D. */
49857 memcpy (d.perm, sel, nelt);
49858 for (i = which = 0; i < nelt; ++i)
49860 unsigned char e = d.perm[i];
49861 gcc_assert (e < 2 * nelt);
49862 which |= (e < nelt ? 1 : 2);
49865 /* For all elements from second vector, fold the elements to first. */
49866 if (which == 2)
49867 for (i = 0; i < nelt; ++i)
49868 d.perm[i] -= nelt;
49870 /* Check whether the mask can be applied to the vector type. */
49871 d.one_operand_p = (which != 3);
49873 /* Implementable with shufps or pshufd. */
49874 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49875 return true;
49877 /* Otherwise we have to go through the motions and see if we can
49878 figure out how to generate the requested permutation. */
49879 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49880 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49881 if (!d.one_operand_p)
49882 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49884 start_sequence ();
49885 ret = ix86_expand_vec_perm_const_1 (&d);
49886 end_sequence ();
49888 return ret;
49891 void
49892 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49894 struct expand_vec_perm_d d;
49895 unsigned i, nelt;
49897 d.target = targ;
49898 d.op0 = op0;
49899 d.op1 = op1;
49900 d.vmode = GET_MODE (targ);
49901 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49902 d.one_operand_p = false;
49903 d.testing_p = false;
49905 for (i = 0; i < nelt; ++i)
49906 d.perm[i] = i * 2 + odd;
49908 /* We'll either be able to implement the permutation directly... */
49909 if (expand_vec_perm_1 (&d))
49910 return;
49912 /* ... or we use the special-case patterns. */
49913 expand_vec_perm_even_odd_1 (&d, odd);
49916 static void
49917 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49919 struct expand_vec_perm_d d;
49920 unsigned i, nelt, base;
49921 bool ok;
49923 d.target = targ;
49924 d.op0 = op0;
49925 d.op1 = op1;
49926 d.vmode = GET_MODE (targ);
49927 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49928 d.one_operand_p = false;
49929 d.testing_p = false;
49931 base = high_p ? nelt / 2 : 0;
49932 for (i = 0; i < nelt / 2; ++i)
49934 d.perm[i * 2] = i + base;
49935 d.perm[i * 2 + 1] = i + base + nelt;
49938 /* Note that for AVX this isn't one instruction. */
49939 ok = ix86_expand_vec_perm_const_1 (&d);
49940 gcc_assert (ok);
49944 /* Expand a vector operation CODE for a V*QImode in terms of the
49945 same operation on V*HImode. */
49947 void
49948 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49950 machine_mode qimode = GET_MODE (dest);
49951 machine_mode himode;
49952 rtx (*gen_il) (rtx, rtx, rtx);
49953 rtx (*gen_ih) (rtx, rtx, rtx);
49954 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49955 struct expand_vec_perm_d d;
49956 bool ok, full_interleave;
49957 bool uns_p = false;
49958 int i;
49960 switch (qimode)
49962 case V16QImode:
49963 himode = V8HImode;
49964 gen_il = gen_vec_interleave_lowv16qi;
49965 gen_ih = gen_vec_interleave_highv16qi;
49966 break;
49967 case V32QImode:
49968 himode = V16HImode;
49969 gen_il = gen_avx2_interleave_lowv32qi;
49970 gen_ih = gen_avx2_interleave_highv32qi;
49971 break;
49972 case V64QImode:
49973 himode = V32HImode;
49974 gen_il = gen_avx512bw_interleave_lowv64qi;
49975 gen_ih = gen_avx512bw_interleave_highv64qi;
49976 break;
49977 default:
49978 gcc_unreachable ();
49981 op2_l = op2_h = op2;
49982 switch (code)
49984 case MULT:
49985 /* Unpack data such that we've got a source byte in each low byte of
49986 each word. We don't care what goes into the high byte of each word.
49987 Rather than trying to get zero in there, most convenient is to let
49988 it be a copy of the low byte. */
49989 op2_l = gen_reg_rtx (qimode);
49990 op2_h = gen_reg_rtx (qimode);
49991 emit_insn (gen_il (op2_l, op2, op2));
49992 emit_insn (gen_ih (op2_h, op2, op2));
49993 /* FALLTHRU */
49995 op1_l = gen_reg_rtx (qimode);
49996 op1_h = gen_reg_rtx (qimode);
49997 emit_insn (gen_il (op1_l, op1, op1));
49998 emit_insn (gen_ih (op1_h, op1, op1));
49999 full_interleave = qimode == V16QImode;
50000 break;
50002 case ASHIFT:
50003 case LSHIFTRT:
50004 uns_p = true;
50005 /* FALLTHRU */
50006 case ASHIFTRT:
50007 op1_l = gen_reg_rtx (himode);
50008 op1_h = gen_reg_rtx (himode);
50009 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
50010 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
50011 full_interleave = true;
50012 break;
50013 default:
50014 gcc_unreachable ();
50017 /* Perform the operation. */
50018 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
50019 1, OPTAB_DIRECT);
50020 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
50021 1, OPTAB_DIRECT);
50022 gcc_assert (res_l && res_h);
50024 /* Merge the data back into the right place. */
50025 d.target = dest;
50026 d.op0 = gen_lowpart (qimode, res_l);
50027 d.op1 = gen_lowpart (qimode, res_h);
50028 d.vmode = qimode;
50029 d.nelt = GET_MODE_NUNITS (qimode);
50030 d.one_operand_p = false;
50031 d.testing_p = false;
50033 if (full_interleave)
50035 /* For SSE2, we used an full interleave, so the desired
50036 results are in the even elements. */
50037 for (i = 0; i < 64; ++i)
50038 d.perm[i] = i * 2;
50040 else
50042 /* For AVX, the interleave used above was not cross-lane. So the
50043 extraction is evens but with the second and third quarter swapped.
50044 Happily, that is even one insn shorter than even extraction. */
50045 for (i = 0; i < 64; ++i)
50046 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
50049 ok = ix86_expand_vec_perm_const_1 (&d);
50050 gcc_assert (ok);
50052 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50053 gen_rtx_fmt_ee (code, qimode, op1, op2));
50056 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
50057 if op is CONST_VECTOR with all odd elements equal to their
50058 preceding element. */
50060 static bool
50061 const_vector_equal_evenodd_p (rtx op)
50063 machine_mode mode = GET_MODE (op);
50064 int i, nunits = GET_MODE_NUNITS (mode);
50065 if (GET_CODE (op) != CONST_VECTOR
50066 || nunits != CONST_VECTOR_NUNITS (op))
50067 return false;
50068 for (i = 0; i < nunits; i += 2)
50069 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50070 return false;
50071 return true;
50074 void
50075 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50076 bool uns_p, bool odd_p)
50078 machine_mode mode = GET_MODE (op1);
50079 machine_mode wmode = GET_MODE (dest);
50080 rtx x;
50081 rtx orig_op1 = op1, orig_op2 = op2;
50083 if (!nonimmediate_operand (op1, mode))
50084 op1 = force_reg (mode, op1);
50085 if (!nonimmediate_operand (op2, mode))
50086 op2 = force_reg (mode, op2);
50088 /* We only play even/odd games with vectors of SImode. */
50089 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50091 /* If we're looking for the odd results, shift those members down to
50092 the even slots. For some cpus this is faster than a PSHUFD. */
50093 if (odd_p)
50095 /* For XOP use vpmacsdqh, but only for smult, as it is only
50096 signed. */
50097 if (TARGET_XOP && mode == V4SImode && !uns_p)
50099 x = force_reg (wmode, CONST0_RTX (wmode));
50100 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50101 return;
50104 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50105 if (!const_vector_equal_evenodd_p (orig_op1))
50106 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50107 x, NULL, 1, OPTAB_DIRECT);
50108 if (!const_vector_equal_evenodd_p (orig_op2))
50109 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50110 x, NULL, 1, OPTAB_DIRECT);
50111 op1 = gen_lowpart (mode, op1);
50112 op2 = gen_lowpart (mode, op2);
50115 if (mode == V16SImode)
50117 if (uns_p)
50118 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50119 else
50120 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50122 else if (mode == V8SImode)
50124 if (uns_p)
50125 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50126 else
50127 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50129 else if (uns_p)
50130 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50131 else if (TARGET_SSE4_1)
50132 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50133 else
50135 rtx s1, s2, t0, t1, t2;
50137 /* The easiest way to implement this without PMULDQ is to go through
50138 the motions as if we are performing a full 64-bit multiply. With
50139 the exception that we need to do less shuffling of the elements. */
50141 /* Compute the sign-extension, aka highparts, of the two operands. */
50142 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50143 op1, pc_rtx, pc_rtx);
50144 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50145 op2, pc_rtx, pc_rtx);
50147 /* Multiply LO(A) * HI(B), and vice-versa. */
50148 t1 = gen_reg_rtx (wmode);
50149 t2 = gen_reg_rtx (wmode);
50150 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50151 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50153 /* Multiply LO(A) * LO(B). */
50154 t0 = gen_reg_rtx (wmode);
50155 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50157 /* Combine and shift the highparts into place. */
50158 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50159 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50160 1, OPTAB_DIRECT);
50162 /* Combine high and low parts. */
50163 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50164 return;
50166 emit_insn (x);
50169 void
50170 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50171 bool uns_p, bool high_p)
50173 machine_mode wmode = GET_MODE (dest);
50174 machine_mode mode = GET_MODE (op1);
50175 rtx t1, t2, t3, t4, mask;
50177 switch (mode)
50179 case V4SImode:
50180 t1 = gen_reg_rtx (mode);
50181 t2 = gen_reg_rtx (mode);
50182 if (TARGET_XOP && !uns_p)
50184 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
50185 shuffle the elements once so that all elements are in the right
50186 place for immediate use: { A C B D }. */
50187 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50188 const1_rtx, GEN_INT (3)));
50189 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50190 const1_rtx, GEN_INT (3)));
50192 else
50194 /* Put the elements into place for the multiply. */
50195 ix86_expand_vec_interleave (t1, op1, op1, high_p);
50196 ix86_expand_vec_interleave (t2, op2, op2, high_p);
50197 high_p = false;
50199 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50200 break;
50202 case V8SImode:
50203 /* Shuffle the elements between the lanes. After this we
50204 have { A B E F | C D G H } for each operand. */
50205 t1 = gen_reg_rtx (V4DImode);
50206 t2 = gen_reg_rtx (V4DImode);
50207 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50208 const0_rtx, const2_rtx,
50209 const1_rtx, GEN_INT (3)));
50210 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50211 const0_rtx, const2_rtx,
50212 const1_rtx, GEN_INT (3)));
50214 /* Shuffle the elements within the lanes. After this we
50215 have { A A B B | C C D D } or { E E F F | G G H H }. */
50216 t3 = gen_reg_rtx (V8SImode);
50217 t4 = gen_reg_rtx (V8SImode);
50218 mask = GEN_INT (high_p
50219 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50220 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50221 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50222 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50224 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50225 break;
50227 case V8HImode:
50228 case V16HImode:
50229 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50230 uns_p, OPTAB_DIRECT);
50231 t2 = expand_binop (mode,
50232 uns_p ? umul_highpart_optab : smul_highpart_optab,
50233 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50234 gcc_assert (t1 && t2);
50236 t3 = gen_reg_rtx (mode);
50237 ix86_expand_vec_interleave (t3, t1, t2, high_p);
50238 emit_move_insn (dest, gen_lowpart (wmode, t3));
50239 break;
50241 case V16QImode:
50242 case V32QImode:
50243 case V32HImode:
50244 case V16SImode:
50245 case V64QImode:
50246 t1 = gen_reg_rtx (wmode);
50247 t2 = gen_reg_rtx (wmode);
50248 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50249 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50251 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
50252 break;
50254 default:
50255 gcc_unreachable ();
50259 void
50260 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50262 rtx res_1, res_2, res_3, res_4;
50264 res_1 = gen_reg_rtx (V4SImode);
50265 res_2 = gen_reg_rtx (V4SImode);
50266 res_3 = gen_reg_rtx (V2DImode);
50267 res_4 = gen_reg_rtx (V2DImode);
50268 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50269 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50271 /* Move the results in element 2 down to element 1; we don't care
50272 what goes in elements 2 and 3. Then we can merge the parts
50273 back together with an interleave.
50275 Note that two other sequences were tried:
50276 (1) Use interleaves at the start instead of psrldq, which allows
50277 us to use a single shufps to merge things back at the end.
50278 (2) Use shufps here to combine the two vectors, then pshufd to
50279 put the elements in the correct order.
50280 In both cases the cost of the reformatting stall was too high
50281 and the overall sequence slower. */
50283 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50284 const0_rtx, const2_rtx,
50285 const0_rtx, const0_rtx));
50286 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50287 const0_rtx, const2_rtx,
50288 const0_rtx, const0_rtx));
50289 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50291 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50294 void
50295 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50297 machine_mode mode = GET_MODE (op0);
50298 rtx t1, t2, t3, t4, t5, t6;
50300 if (TARGET_AVX512DQ && mode == V8DImode)
50301 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50302 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50303 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50304 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50305 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50306 else if (TARGET_XOP && mode == V2DImode)
50308 /* op1: A,B,C,D, op2: E,F,G,H */
50309 op1 = gen_lowpart (V4SImode, op1);
50310 op2 = gen_lowpart (V4SImode, op2);
50312 t1 = gen_reg_rtx (V4SImode);
50313 t2 = gen_reg_rtx (V4SImode);
50314 t3 = gen_reg_rtx (V2DImode);
50315 t4 = gen_reg_rtx (V2DImode);
50317 /* t1: B,A,D,C */
50318 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50319 GEN_INT (1),
50320 GEN_INT (0),
50321 GEN_INT (3),
50322 GEN_INT (2)));
50324 /* t2: (B*E),(A*F),(D*G),(C*H) */
50325 emit_insn (gen_mulv4si3 (t2, t1, op2));
50327 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50328 emit_insn (gen_xop_phadddq (t3, t2));
50330 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50331 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50333 /* Multiply lower parts and add all */
50334 t5 = gen_reg_rtx (V2DImode);
50335 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50336 gen_lowpart (V4SImode, op1),
50337 gen_lowpart (V4SImode, op2)));
50338 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50341 else
50343 machine_mode nmode;
50344 rtx (*umul) (rtx, rtx, rtx);
50346 if (mode == V2DImode)
50348 umul = gen_vec_widen_umult_even_v4si;
50349 nmode = V4SImode;
50351 else if (mode == V4DImode)
50353 umul = gen_vec_widen_umult_even_v8si;
50354 nmode = V8SImode;
50356 else if (mode == V8DImode)
50358 umul = gen_vec_widen_umult_even_v16si;
50359 nmode = V16SImode;
50361 else
50362 gcc_unreachable ();
50365 /* Multiply low parts. */
50366 t1 = gen_reg_rtx (mode);
50367 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50369 /* Shift input vectors right 32 bits so we can multiply high parts. */
50370 t6 = GEN_INT (32);
50371 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50372 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50374 /* Multiply high parts by low parts. */
50375 t4 = gen_reg_rtx (mode);
50376 t5 = gen_reg_rtx (mode);
50377 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50378 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50380 /* Combine and shift the highparts back. */
50381 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50382 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50384 /* Combine high and low parts. */
50385 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50388 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50389 gen_rtx_MULT (mode, op1, op2));
50392 /* Return 1 if control tansfer instruction INSN
50393 should be encoded with bnd prefix.
50394 If insn is NULL then return 1 when control
50395 transfer instructions should be prefixed with
50396 bnd by default for current function. */
50398 bool
50399 ix86_bnd_prefixed_insn_p (rtx insn)
50401 /* For call insns check special flag. */
50402 if (insn && CALL_P (insn))
50404 rtx call = get_call_rtx_from (insn);
50405 if (call)
50406 return CALL_EXPR_WITH_BOUNDS_P (call);
50409 /* All other insns are prefixed only if function is instrumented. */
50410 return chkp_function_instrumented_p (current_function_decl);
50413 /* Calculate integer abs() using only SSE2 instructions. */
50415 void
50416 ix86_expand_sse2_abs (rtx target, rtx input)
50418 machine_mode mode = GET_MODE (target);
50419 rtx tmp0, tmp1, x;
50421 switch (mode)
50423 /* For 32-bit signed integer X, the best way to calculate the absolute
50424 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50425 case V4SImode:
50426 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50427 GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
50428 NULL, 0, OPTAB_DIRECT);
50429 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50430 NULL, 0, OPTAB_DIRECT);
50431 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50432 target, 0, OPTAB_DIRECT);
50433 break;
50435 /* For 16-bit signed integer X, the best way to calculate the absolute
50436 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50437 case V8HImode:
50438 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50440 x = expand_simple_binop (mode, SMAX, tmp0, input,
50441 target, 0, OPTAB_DIRECT);
50442 break;
50444 /* For 8-bit signed integer X, the best way to calculate the absolute
50445 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50446 as SSE2 provides the PMINUB insn. */
50447 case V16QImode:
50448 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50450 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50451 target, 0, OPTAB_DIRECT);
50452 break;
50454 default:
50455 gcc_unreachable ();
50458 if (x != target)
50459 emit_move_insn (target, x);
50462 /* Expand an extract from a vector register through pextr insn.
50463 Return true if successful. */
50465 bool
50466 ix86_expand_pextr (rtx *operands)
50468 rtx dst = operands[0];
50469 rtx src = operands[1];
50471 unsigned int size = INTVAL (operands[2]);
50472 unsigned int pos = INTVAL (operands[3]);
50474 if (SUBREG_P (dst))
50476 /* Reject non-lowpart subregs. */
50477 if (SUBREG_BYTE (dst) > 0)
50478 return false;
50479 dst = SUBREG_REG (dst);
50482 if (SUBREG_P (src))
50484 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
50485 src = SUBREG_REG (src);
50488 switch (GET_MODE (src))
50490 case V16QImode:
50491 case V8HImode:
50492 case V4SImode:
50493 case V2DImode:
50494 case V1TImode:
50495 case TImode:
50497 machine_mode srcmode, dstmode;
50498 rtx d, pat;
50500 dstmode = mode_for_size (size, MODE_INT, 0);
50502 switch (dstmode)
50504 case QImode:
50505 if (!TARGET_SSE4_1)
50506 return false;
50507 srcmode = V16QImode;
50508 break;
50510 case HImode:
50511 if (!TARGET_SSE2)
50512 return false;
50513 srcmode = V8HImode;
50514 break;
50516 case SImode:
50517 if (!TARGET_SSE4_1)
50518 return false;
50519 srcmode = V4SImode;
50520 break;
50522 case DImode:
50523 gcc_assert (TARGET_64BIT);
50524 if (!TARGET_SSE4_1)
50525 return false;
50526 srcmode = V2DImode;
50527 break;
50529 default:
50530 return false;
50533 /* Reject extractions from misaligned positions. */
50534 if (pos & (size-1))
50535 return false;
50537 if (GET_MODE (dst) == dstmode)
50538 d = dst;
50539 else
50540 d = gen_reg_rtx (dstmode);
50542 /* Construct insn pattern. */
50543 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
50544 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
50546 /* Let the rtl optimizers know about the zero extension performed. */
50547 if (dstmode == QImode || dstmode == HImode)
50549 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
50550 d = gen_lowpart (SImode, d);
50553 emit_insn (gen_rtx_SET (d, pat));
50555 if (d != dst)
50556 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50557 return true;
50560 default:
50561 return false;
50565 /* Expand an insert into a vector register through pinsr insn.
50566 Return true if successful. */
50568 bool
50569 ix86_expand_pinsr (rtx *operands)
50571 rtx dst = operands[0];
50572 rtx src = operands[3];
50574 unsigned int size = INTVAL (operands[1]);
50575 unsigned int pos = INTVAL (operands[2]);
50577 if (SUBREG_P (dst))
50579 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50580 dst = SUBREG_REG (dst);
50583 switch (GET_MODE (dst))
50585 case V16QImode:
50586 case V8HImode:
50587 case V4SImode:
50588 case V2DImode:
50589 case V1TImode:
50590 case TImode:
50592 machine_mode srcmode, dstmode;
50593 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50594 rtx d;
50596 srcmode = mode_for_size (size, MODE_INT, 0);
50598 switch (srcmode)
50600 case QImode:
50601 if (!TARGET_SSE4_1)
50602 return false;
50603 dstmode = V16QImode;
50604 pinsr = gen_sse4_1_pinsrb;
50605 break;
50607 case HImode:
50608 if (!TARGET_SSE2)
50609 return false;
50610 dstmode = V8HImode;
50611 pinsr = gen_sse2_pinsrw;
50612 break;
50614 case SImode:
50615 if (!TARGET_SSE4_1)
50616 return false;
50617 dstmode = V4SImode;
50618 pinsr = gen_sse4_1_pinsrd;
50619 break;
50621 case DImode:
50622 gcc_assert (TARGET_64BIT);
50623 if (!TARGET_SSE4_1)
50624 return false;
50625 dstmode = V2DImode;
50626 pinsr = gen_sse4_1_pinsrq;
50627 break;
50629 default:
50630 return false;
50633 /* Reject insertions to misaligned positions. */
50634 if (pos & (size-1))
50635 return false;
50637 if (SUBREG_P (src))
50639 unsigned int srcpos = SUBREG_BYTE (src);
50641 if (srcpos > 0)
50643 rtx extr_ops[4];
50645 extr_ops[0] = gen_reg_rtx (srcmode);
50646 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
50647 extr_ops[2] = GEN_INT (size);
50648 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
50650 if (!ix86_expand_pextr (extr_ops))
50651 return false;
50653 src = extr_ops[0];
50655 else
50656 src = gen_lowpart (srcmode, SUBREG_REG (src));
50659 if (GET_MODE (dst) == dstmode)
50660 d = dst;
50661 else
50662 d = gen_reg_rtx (dstmode);
50664 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
50665 gen_lowpart (srcmode, src),
50666 GEN_INT (1 << (pos / size))));
50667 if (d != dst)
50668 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50669 return true;
50672 default:
50673 return false;
50677 /* This function returns the calling abi specific va_list type node.
50678 It returns the FNDECL specific va_list type. */
50680 static tree
50681 ix86_fn_abi_va_list (tree fndecl)
50683 if (!TARGET_64BIT)
50684 return va_list_type_node;
50685 gcc_assert (fndecl != NULL_TREE);
50687 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50688 return ms_va_list_type_node;
50689 else
50690 return sysv_va_list_type_node;
50693 /* Returns the canonical va_list type specified by TYPE. If there
50694 is no valid TYPE provided, it return NULL_TREE. */
50696 static tree
50697 ix86_canonical_va_list_type (tree type)
50699 tree wtype, htype;
50701 /* Resolve references and pointers to va_list type. */
50702 if (TREE_CODE (type) == MEM_REF)
50703 type = TREE_TYPE (type);
50704 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50705 type = TREE_TYPE (type);
50706 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50707 type = TREE_TYPE (type);
50709 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50711 wtype = va_list_type_node;
50712 gcc_assert (wtype != NULL_TREE);
50713 htype = type;
50714 if (TREE_CODE (wtype) == ARRAY_TYPE)
50716 /* If va_list is an array type, the argument may have decayed
50717 to a pointer type, e.g. by being passed to another function.
50718 In that case, unwrap both types so that we can compare the
50719 underlying records. */
50720 if (TREE_CODE (htype) == ARRAY_TYPE
50721 || POINTER_TYPE_P (htype))
50723 wtype = TREE_TYPE (wtype);
50724 htype = TREE_TYPE (htype);
50727 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50728 return va_list_type_node;
50729 wtype = sysv_va_list_type_node;
50730 gcc_assert (wtype != NULL_TREE);
50731 htype = type;
50732 if (TREE_CODE (wtype) == ARRAY_TYPE)
50734 /* If va_list is an array type, the argument may have decayed
50735 to a pointer type, e.g. by being passed to another function.
50736 In that case, unwrap both types so that we can compare the
50737 underlying records. */
50738 if (TREE_CODE (htype) == ARRAY_TYPE
50739 || POINTER_TYPE_P (htype))
50741 wtype = TREE_TYPE (wtype);
50742 htype = TREE_TYPE (htype);
50745 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50746 return sysv_va_list_type_node;
50747 wtype = ms_va_list_type_node;
50748 gcc_assert (wtype != NULL_TREE);
50749 htype = type;
50750 if (TREE_CODE (wtype) == ARRAY_TYPE)
50752 /* If va_list is an array type, the argument may have decayed
50753 to a pointer type, e.g. by being passed to another function.
50754 In that case, unwrap both types so that we can compare the
50755 underlying records. */
50756 if (TREE_CODE (htype) == ARRAY_TYPE
50757 || POINTER_TYPE_P (htype))
50759 wtype = TREE_TYPE (wtype);
50760 htype = TREE_TYPE (htype);
50763 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50764 return ms_va_list_type_node;
50765 return NULL_TREE;
50767 return std_canonical_va_list_type (type);
50770 /* Iterate through the target-specific builtin types for va_list.
50771 IDX denotes the iterator, *PTREE is set to the result type of
50772 the va_list builtin, and *PNAME to its internal type.
50773 Returns zero if there is no element for this index, otherwise
50774 IDX should be increased upon the next call.
50775 Note, do not iterate a base builtin's name like __builtin_va_list.
50776 Used from c_common_nodes_and_builtins. */
50778 static int
50779 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50781 if (TARGET_64BIT)
50783 switch (idx)
50785 default:
50786 break;
50788 case 0:
50789 *ptree = ms_va_list_type_node;
50790 *pname = "__builtin_ms_va_list";
50791 return 1;
50793 case 1:
50794 *ptree = sysv_va_list_type_node;
50795 *pname = "__builtin_sysv_va_list";
50796 return 1;
50800 return 0;
50803 #undef TARGET_SCHED_DISPATCH
50804 #define TARGET_SCHED_DISPATCH has_dispatch
50805 #undef TARGET_SCHED_DISPATCH_DO
50806 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50807 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50808 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50809 #undef TARGET_SCHED_REORDER
50810 #define TARGET_SCHED_REORDER ix86_sched_reorder
50811 #undef TARGET_SCHED_ADJUST_PRIORITY
50812 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50813 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50814 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50815 ix86_dependencies_evaluation_hook
50817 /* The size of the dispatch window is the total number of bytes of
50818 object code allowed in a window. */
50819 #define DISPATCH_WINDOW_SIZE 16
50821 /* Number of dispatch windows considered for scheduling. */
50822 #define MAX_DISPATCH_WINDOWS 3
50824 /* Maximum number of instructions in a window. */
50825 #define MAX_INSN 4
50827 /* Maximum number of immediate operands in a window. */
50828 #define MAX_IMM 4
50830 /* Maximum number of immediate bits allowed in a window. */
50831 #define MAX_IMM_SIZE 128
50833 /* Maximum number of 32 bit immediates allowed in a window. */
50834 #define MAX_IMM_32 4
50836 /* Maximum number of 64 bit immediates allowed in a window. */
50837 #define MAX_IMM_64 2
50839 /* Maximum total of loads or prefetches allowed in a window. */
50840 #define MAX_LOAD 2
50842 /* Maximum total of stores allowed in a window. */
50843 #define MAX_STORE 1
50845 #undef BIG
50846 #define BIG 100
50849 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50850 enum dispatch_group {
50851 disp_no_group = 0,
50852 disp_load,
50853 disp_store,
50854 disp_load_store,
50855 disp_prefetch,
50856 disp_imm,
50857 disp_imm_32,
50858 disp_imm_64,
50859 disp_branch,
50860 disp_cmp,
50861 disp_jcc,
50862 disp_last
50865 /* Number of allowable groups in a dispatch window. It is an array
50866 indexed by dispatch_group enum. 100 is used as a big number,
50867 because the number of these kind of operations does not have any
50868 effect in dispatch window, but we need them for other reasons in
50869 the table. */
50870 static unsigned int num_allowable_groups[disp_last] = {
50871 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50874 char group_name[disp_last + 1][16] = {
50875 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50876 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50877 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50880 /* Instruction path. */
50881 enum insn_path {
50882 no_path = 0,
50883 path_single, /* Single micro op. */
50884 path_double, /* Double micro op. */
50885 path_multi, /* Instructions with more than 2 micro op.. */
50886 last_path
50889 /* sched_insn_info defines a window to the instructions scheduled in
50890 the basic block. It contains a pointer to the insn_info table and
50891 the instruction scheduled.
50893 Windows are allocated for each basic block and are linked
50894 together. */
50895 typedef struct sched_insn_info_s {
50896 rtx insn;
50897 enum dispatch_group group;
50898 enum insn_path path;
50899 int byte_len;
50900 int imm_bytes;
50901 } sched_insn_info;
50903 /* Linked list of dispatch windows. This is a two way list of
50904 dispatch windows of a basic block. It contains information about
50905 the number of uops in the window and the total number of
50906 instructions and of bytes in the object code for this dispatch
50907 window. */
50908 typedef struct dispatch_windows_s {
50909 int num_insn; /* Number of insn in the window. */
50910 int num_uops; /* Number of uops in the window. */
50911 int window_size; /* Number of bytes in the window. */
50912 int window_num; /* Window number between 0 or 1. */
50913 int num_imm; /* Number of immediates in an insn. */
50914 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50915 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50916 int imm_size; /* Total immediates in the window. */
50917 int num_loads; /* Total memory loads in the window. */
50918 int num_stores; /* Total memory stores in the window. */
50919 int violation; /* Violation exists in window. */
50920 sched_insn_info *window; /* Pointer to the window. */
50921 struct dispatch_windows_s *next;
50922 struct dispatch_windows_s *prev;
50923 } dispatch_windows;
50925 /* Immediate valuse used in an insn. */
50926 typedef struct imm_info_s
50928 int imm;
50929 int imm32;
50930 int imm64;
50931 } imm_info;
50933 static dispatch_windows *dispatch_window_list;
50934 static dispatch_windows *dispatch_window_list1;
50936 /* Get dispatch group of insn. */
50938 static enum dispatch_group
50939 get_mem_group (rtx_insn *insn)
50941 enum attr_memory memory;
50943 if (INSN_CODE (insn) < 0)
50944 return disp_no_group;
50945 memory = get_attr_memory (insn);
50946 if (memory == MEMORY_STORE)
50947 return disp_store;
50949 if (memory == MEMORY_LOAD)
50950 return disp_load;
50952 if (memory == MEMORY_BOTH)
50953 return disp_load_store;
50955 return disp_no_group;
50958 /* Return true if insn is a compare instruction. */
50960 static bool
50961 is_cmp (rtx_insn *insn)
50963 enum attr_type type;
50965 type = get_attr_type (insn);
50966 return (type == TYPE_TEST
50967 || type == TYPE_ICMP
50968 || type == TYPE_FCMP
50969 || GET_CODE (PATTERN (insn)) == COMPARE);
50972 /* Return true if a dispatch violation encountered. */
50974 static bool
50975 dispatch_violation (void)
50977 if (dispatch_window_list->next)
50978 return dispatch_window_list->next->violation;
50979 return dispatch_window_list->violation;
50982 /* Return true if insn is a branch instruction. */
50984 static bool
50985 is_branch (rtx_insn *insn)
50987 return (CALL_P (insn) || JUMP_P (insn));
50990 /* Return true if insn is a prefetch instruction. */
50992 static bool
50993 is_prefetch (rtx_insn *insn)
50995 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50998 /* This function initializes a dispatch window and the list container holding a
50999 pointer to the window. */
51001 static void
51002 init_window (int window_num)
51004 int i;
51005 dispatch_windows *new_list;
51007 if (window_num == 0)
51008 new_list = dispatch_window_list;
51009 else
51010 new_list = dispatch_window_list1;
51012 new_list->num_insn = 0;
51013 new_list->num_uops = 0;
51014 new_list->window_size = 0;
51015 new_list->next = NULL;
51016 new_list->prev = NULL;
51017 new_list->window_num = window_num;
51018 new_list->num_imm = 0;
51019 new_list->num_imm_32 = 0;
51020 new_list->num_imm_64 = 0;
51021 new_list->imm_size = 0;
51022 new_list->num_loads = 0;
51023 new_list->num_stores = 0;
51024 new_list->violation = false;
51026 for (i = 0; i < MAX_INSN; i++)
51028 new_list->window[i].insn = NULL;
51029 new_list->window[i].group = disp_no_group;
51030 new_list->window[i].path = no_path;
51031 new_list->window[i].byte_len = 0;
51032 new_list->window[i].imm_bytes = 0;
51034 return;
51037 /* This function allocates and initializes a dispatch window and the
51038 list container holding a pointer to the window. */
51040 static dispatch_windows *
51041 allocate_window (void)
51043 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
51044 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
51046 return new_list;
51049 /* This routine initializes the dispatch scheduling information. It
51050 initiates building dispatch scheduler tables and constructs the
51051 first dispatch window. */
51053 static void
51054 init_dispatch_sched (void)
51056 /* Allocate a dispatch list and a window. */
51057 dispatch_window_list = allocate_window ();
51058 dispatch_window_list1 = allocate_window ();
51059 init_window (0);
51060 init_window (1);
51063 /* This function returns true if a branch is detected. End of a basic block
51064 does not have to be a branch, but here we assume only branches end a
51065 window. */
51067 static bool
51068 is_end_basic_block (enum dispatch_group group)
51070 return group == disp_branch;
51073 /* This function is called when the end of a window processing is reached. */
51075 static void
51076 process_end_window (void)
51078 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
51079 if (dispatch_window_list->next)
51081 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
51082 gcc_assert (dispatch_window_list->window_size
51083 + dispatch_window_list1->window_size <= 48);
51084 init_window (1);
51086 init_window (0);
51089 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
51090 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
51091 for 48 bytes of instructions. Note that these windows are not dispatch
51092 windows that their sizes are DISPATCH_WINDOW_SIZE. */
51094 static dispatch_windows *
51095 allocate_next_window (int window_num)
51097 if (window_num == 0)
51099 if (dispatch_window_list->next)
51100 init_window (1);
51101 init_window (0);
51102 return dispatch_window_list;
51105 dispatch_window_list->next = dispatch_window_list1;
51106 dispatch_window_list1->prev = dispatch_window_list;
51108 return dispatch_window_list1;
51111 /* Compute number of immediate operands of an instruction. */
51113 static void
51114 find_constant (rtx in_rtx, imm_info *imm_values)
51116 if (INSN_P (in_rtx))
51117 in_rtx = PATTERN (in_rtx);
51118 subrtx_iterator::array_type array;
51119 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
51120 if (const_rtx x = *iter)
51121 switch (GET_CODE (x))
51123 case CONST:
51124 case SYMBOL_REF:
51125 case CONST_INT:
51126 (imm_values->imm)++;
51127 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
51128 (imm_values->imm32)++;
51129 else
51130 (imm_values->imm64)++;
51131 break;
51133 case CONST_DOUBLE:
51134 case CONST_WIDE_INT:
51135 (imm_values->imm)++;
51136 (imm_values->imm64)++;
51137 break;
51139 case CODE_LABEL:
51140 if (LABEL_KIND (x) == LABEL_NORMAL)
51142 (imm_values->imm)++;
51143 (imm_values->imm32)++;
51145 break;
51147 default:
51148 break;
51152 /* Return total size of immediate operands of an instruction along with number
51153 of corresponding immediate-operands. It initializes its parameters to zero
51154 befor calling FIND_CONSTANT.
51155 INSN is the input instruction. IMM is the total of immediates.
51156 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
51157 bit immediates. */
51159 static int
51160 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
51162 imm_info imm_values = {0, 0, 0};
51164 find_constant (insn, &imm_values);
51165 *imm = imm_values.imm;
51166 *imm32 = imm_values.imm32;
51167 *imm64 = imm_values.imm64;
51168 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
51171 /* This function indicates if an operand of an instruction is an
51172 immediate. */
51174 static bool
51175 has_immediate (rtx_insn *insn)
51177 int num_imm_operand;
51178 int num_imm32_operand;
51179 int num_imm64_operand;
51181 if (insn)
51182 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51183 &num_imm64_operand);
51184 return false;
51187 /* Return single or double path for instructions. */
51189 static enum insn_path
51190 get_insn_path (rtx_insn *insn)
51192 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51194 if ((int)path == 0)
51195 return path_single;
51197 if ((int)path == 1)
51198 return path_double;
51200 return path_multi;
51203 /* Return insn dispatch group. */
51205 static enum dispatch_group
51206 get_insn_group (rtx_insn *insn)
51208 enum dispatch_group group = get_mem_group (insn);
51209 if (group)
51210 return group;
51212 if (is_branch (insn))
51213 return disp_branch;
51215 if (is_cmp (insn))
51216 return disp_cmp;
51218 if (has_immediate (insn))
51219 return disp_imm;
51221 if (is_prefetch (insn))
51222 return disp_prefetch;
51224 return disp_no_group;
51227 /* Count number of GROUP restricted instructions in a dispatch
51228 window WINDOW_LIST. */
51230 static int
51231 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51233 enum dispatch_group group = get_insn_group (insn);
51234 int imm_size;
51235 int num_imm_operand;
51236 int num_imm32_operand;
51237 int num_imm64_operand;
51239 if (group == disp_no_group)
51240 return 0;
51242 if (group == disp_imm)
51244 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51245 &num_imm64_operand);
51246 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51247 || num_imm_operand + window_list->num_imm > MAX_IMM
51248 || (num_imm32_operand > 0
51249 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51250 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51251 || (num_imm64_operand > 0
51252 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51253 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51254 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51255 && num_imm64_operand > 0
51256 && ((window_list->num_imm_64 > 0
51257 && window_list->num_insn >= 2)
51258 || window_list->num_insn >= 3)))
51259 return BIG;
51261 return 1;
51264 if ((group == disp_load_store
51265 && (window_list->num_loads >= MAX_LOAD
51266 || window_list->num_stores >= MAX_STORE))
51267 || ((group == disp_load
51268 || group == disp_prefetch)
51269 && window_list->num_loads >= MAX_LOAD)
51270 || (group == disp_store
51271 && window_list->num_stores >= MAX_STORE))
51272 return BIG;
51274 return 1;
51277 /* This function returns true if insn satisfies dispatch rules on the
51278 last window scheduled. */
51280 static bool
51281 fits_dispatch_window (rtx_insn *insn)
51283 dispatch_windows *window_list = dispatch_window_list;
51284 dispatch_windows *window_list_next = dispatch_window_list->next;
51285 unsigned int num_restrict;
51286 enum dispatch_group group = get_insn_group (insn);
51287 enum insn_path path = get_insn_path (insn);
51288 int sum;
51290 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
51291 instructions should be given the lowest priority in the
51292 scheduling process in Haifa scheduler to make sure they will be
51293 scheduled in the same dispatch window as the reference to them. */
51294 if (group == disp_jcc || group == disp_cmp)
51295 return false;
51297 /* Check nonrestricted. */
51298 if (group == disp_no_group || group == disp_branch)
51299 return true;
51301 /* Get last dispatch window. */
51302 if (window_list_next)
51303 window_list = window_list_next;
51305 if (window_list->window_num == 1)
51307 sum = window_list->prev->window_size + window_list->window_size;
51309 if (sum == 32
51310 || (min_insn_size (insn) + sum) >= 48)
51311 /* Window 1 is full. Go for next window. */
51312 return true;
51315 num_restrict = count_num_restricted (insn, window_list);
51317 if (num_restrict > num_allowable_groups[group])
51318 return false;
51320 /* See if it fits in the first window. */
51321 if (window_list->window_num == 0)
51323 /* The first widow should have only single and double path
51324 uops. */
51325 if (path == path_double
51326 && (window_list->num_uops + 2) > MAX_INSN)
51327 return false;
51328 else if (path != path_single)
51329 return false;
51331 return true;
51334 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51335 dispatch window WINDOW_LIST. */
51337 static void
51338 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51340 int byte_len = min_insn_size (insn);
51341 int num_insn = window_list->num_insn;
51342 int imm_size;
51343 sched_insn_info *window = window_list->window;
51344 enum dispatch_group group = get_insn_group (insn);
51345 enum insn_path path = get_insn_path (insn);
51346 int num_imm_operand;
51347 int num_imm32_operand;
51348 int num_imm64_operand;
51350 if (!window_list->violation && group != disp_cmp
51351 && !fits_dispatch_window (insn))
51352 window_list->violation = true;
51354 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51355 &num_imm64_operand);
51357 /* Initialize window with new instruction. */
51358 window[num_insn].insn = insn;
51359 window[num_insn].byte_len = byte_len;
51360 window[num_insn].group = group;
51361 window[num_insn].path = path;
51362 window[num_insn].imm_bytes = imm_size;
51364 window_list->window_size += byte_len;
51365 window_list->num_insn = num_insn + 1;
51366 window_list->num_uops = window_list->num_uops + num_uops;
51367 window_list->imm_size += imm_size;
51368 window_list->num_imm += num_imm_operand;
51369 window_list->num_imm_32 += num_imm32_operand;
51370 window_list->num_imm_64 += num_imm64_operand;
51372 if (group == disp_store)
51373 window_list->num_stores += 1;
51374 else if (group == disp_load
51375 || group == disp_prefetch)
51376 window_list->num_loads += 1;
51377 else if (group == disp_load_store)
51379 window_list->num_stores += 1;
51380 window_list->num_loads += 1;
51384 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51385 If the total bytes of instructions or the number of instructions in
51386 the window exceed allowable, it allocates a new window. */
51388 static void
51389 add_to_dispatch_window (rtx_insn *insn)
51391 int byte_len;
51392 dispatch_windows *window_list;
51393 dispatch_windows *next_list;
51394 dispatch_windows *window0_list;
51395 enum insn_path path;
51396 enum dispatch_group insn_group;
51397 bool insn_fits;
51398 int num_insn;
51399 int num_uops;
51400 int window_num;
51401 int insn_num_uops;
51402 int sum;
51404 if (INSN_CODE (insn) < 0)
51405 return;
51407 byte_len = min_insn_size (insn);
51408 window_list = dispatch_window_list;
51409 next_list = window_list->next;
51410 path = get_insn_path (insn);
51411 insn_group = get_insn_group (insn);
51413 /* Get the last dispatch window. */
51414 if (next_list)
51415 window_list = dispatch_window_list->next;
51417 if (path == path_single)
51418 insn_num_uops = 1;
51419 else if (path == path_double)
51420 insn_num_uops = 2;
51421 else
51422 insn_num_uops = (int) path;
51424 /* If current window is full, get a new window.
51425 Window number zero is full, if MAX_INSN uops are scheduled in it.
51426 Window number one is full, if window zero's bytes plus window
51427 one's bytes is 32, or if the bytes of the new instruction added
51428 to the total makes it greater than 48, or it has already MAX_INSN
51429 instructions in it. */
51430 num_insn = window_list->num_insn;
51431 num_uops = window_list->num_uops;
51432 window_num = window_list->window_num;
51433 insn_fits = fits_dispatch_window (insn);
51435 if (num_insn >= MAX_INSN
51436 || num_uops + insn_num_uops > MAX_INSN
51437 || !(insn_fits))
51439 window_num = ~window_num & 1;
51440 window_list = allocate_next_window (window_num);
51443 if (window_num == 0)
51445 add_insn_window (insn, window_list, insn_num_uops);
51446 if (window_list->num_insn >= MAX_INSN
51447 && insn_group == disp_branch)
51449 process_end_window ();
51450 return;
51453 else if (window_num == 1)
51455 window0_list = window_list->prev;
51456 sum = window0_list->window_size + window_list->window_size;
51457 if (sum == 32
51458 || (byte_len + sum) >= 48)
51460 process_end_window ();
51461 window_list = dispatch_window_list;
51464 add_insn_window (insn, window_list, insn_num_uops);
51466 else
51467 gcc_unreachable ();
51469 if (is_end_basic_block (insn_group))
51471 /* End of basic block is reached do end-basic-block process. */
51472 process_end_window ();
51473 return;
51477 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51479 DEBUG_FUNCTION static void
51480 debug_dispatch_window_file (FILE *file, int window_num)
51482 dispatch_windows *list;
51483 int i;
51485 if (window_num == 0)
51486 list = dispatch_window_list;
51487 else
51488 list = dispatch_window_list1;
51490 fprintf (file, "Window #%d:\n", list->window_num);
51491 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51492 list->num_insn, list->num_uops, list->window_size);
51493 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51494 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51496 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51497 list->num_stores);
51498 fprintf (file, " insn info:\n");
51500 for (i = 0; i < MAX_INSN; i++)
51502 if (!list->window[i].insn)
51503 break;
51504 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51505 i, group_name[list->window[i].group],
51506 i, (void *)list->window[i].insn,
51507 i, list->window[i].path,
51508 i, list->window[i].byte_len,
51509 i, list->window[i].imm_bytes);
51513 /* Print to stdout a dispatch window. */
51515 DEBUG_FUNCTION void
51516 debug_dispatch_window (int window_num)
51518 debug_dispatch_window_file (stdout, window_num);
51521 /* Print INSN dispatch information to FILE. */
51523 DEBUG_FUNCTION static void
51524 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51526 int byte_len;
51527 enum insn_path path;
51528 enum dispatch_group group;
51529 int imm_size;
51530 int num_imm_operand;
51531 int num_imm32_operand;
51532 int num_imm64_operand;
51534 if (INSN_CODE (insn) < 0)
51535 return;
51537 byte_len = min_insn_size (insn);
51538 path = get_insn_path (insn);
51539 group = get_insn_group (insn);
51540 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51541 &num_imm64_operand);
51543 fprintf (file, " insn info:\n");
51544 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51545 group_name[group], path, byte_len);
51546 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51547 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51550 /* Print to STDERR the status of the ready list with respect to
51551 dispatch windows. */
51553 DEBUG_FUNCTION void
51554 debug_ready_dispatch (void)
51556 int i;
51557 int no_ready = number_in_ready ();
51559 fprintf (stdout, "Number of ready: %d\n", no_ready);
51561 for (i = 0; i < no_ready; i++)
51562 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51565 /* This routine is the driver of the dispatch scheduler. */
51567 static void
51568 do_dispatch (rtx_insn *insn, int mode)
51570 if (mode == DISPATCH_INIT)
51571 init_dispatch_sched ();
51572 else if (mode == ADD_TO_DISPATCH_WINDOW)
51573 add_to_dispatch_window (insn);
51576 /* Return TRUE if Dispatch Scheduling is supported. */
51578 static bool
51579 has_dispatch (rtx_insn *insn, int action)
51581 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51582 && flag_dispatch_scheduler)
51583 switch (action)
51585 default:
51586 return false;
51588 case IS_DISPATCH_ON:
51589 return true;
51590 break;
51592 case IS_CMP:
51593 return is_cmp (insn);
51595 case DISPATCH_VIOLATION:
51596 return dispatch_violation ();
51598 case FITS_DISPATCH_WINDOW:
51599 return fits_dispatch_window (insn);
51602 return false;
51605 /* Implementation of reassociation_width target hook used by
51606 reassoc phase to identify parallelism level in reassociated
51607 tree. Statements tree_code is passed in OPC. Arguments type
51608 is passed in MODE.
51610 Currently parallel reassociation is enabled for Atom
51611 processors only and we set reassociation width to be 2
51612 because Atom may issue up to 2 instructions per cycle.
51614 Return value should be fixed if parallel reassociation is
51615 enabled for other processors. */
51617 static int
51618 ix86_reassociation_width (unsigned int, machine_mode mode)
51620 /* Vector part. */
51621 if (VECTOR_MODE_P (mode))
51623 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51624 return 2;
51625 else
51626 return 1;
51629 /* Scalar part. */
51630 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51631 return 2;
51632 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51633 return 2;
51634 else
51635 return 1;
51638 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51639 place emms and femms instructions. */
51641 static machine_mode
51642 ix86_preferred_simd_mode (machine_mode mode)
51644 if (!TARGET_SSE)
51645 return word_mode;
51647 switch (mode)
51649 case QImode:
51650 return TARGET_AVX512BW ? V64QImode :
51651 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51652 case HImode:
51653 return TARGET_AVX512BW ? V32HImode :
51654 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51655 case SImode:
51656 return TARGET_AVX512F ? V16SImode :
51657 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51658 case DImode:
51659 return TARGET_AVX512F ? V8DImode :
51660 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51662 case SFmode:
51663 if (TARGET_AVX512F)
51664 return V16SFmode;
51665 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51666 return V8SFmode;
51667 else
51668 return V4SFmode;
51670 case DFmode:
51671 if (!TARGET_VECTORIZE_DOUBLE)
51672 return word_mode;
51673 else if (TARGET_AVX512F)
51674 return V8DFmode;
51675 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51676 return V4DFmode;
51677 else if (TARGET_SSE2)
51678 return V2DFmode;
51679 /* FALLTHRU */
51681 default:
51682 return word_mode;
51686 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51687 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51688 256bit and 128bit vectors. */
51690 static unsigned int
51691 ix86_autovectorize_vector_sizes (void)
51693 return TARGET_AVX512F ? 64 | 32 | 16 :
51694 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51699 /* Return class of registers which could be used for pseudo of MODE
51700 and of class RCLASS for spilling instead of memory. Return NO_REGS
51701 if it is not possible or non-profitable. */
51702 static reg_class_t
51703 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51705 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51706 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51707 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51708 return ALL_SSE_REGS;
51709 return NO_REGS;
51712 /* Implement targetm.vectorize.init_cost. */
51714 static void *
51715 ix86_init_cost (struct loop *)
51717 unsigned *cost = XNEWVEC (unsigned, 3);
51718 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51719 return cost;
51722 /* Implement targetm.vectorize.add_stmt_cost. */
51724 static unsigned
51725 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51726 struct _stmt_vec_info *stmt_info, int misalign,
51727 enum vect_cost_model_location where)
51729 unsigned *cost = (unsigned *) data;
51730 unsigned retval = 0;
51732 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51733 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51735 /* Statements in an inner loop relative to the loop being
51736 vectorized are weighted more heavily. The value here is
51737 arbitrary and could potentially be improved with analysis. */
51738 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51739 count *= 50; /* FIXME. */
51741 retval = (unsigned) (count * stmt_cost);
51743 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51744 for Silvermont as it has out of order integer pipeline and can execute
51745 2 scalar instruction per tick, but has in order SIMD pipeline. */
51746 if (TARGET_SILVERMONT || TARGET_INTEL)
51747 if (stmt_info && stmt_info->stmt)
51749 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51750 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51751 retval = (retval * 17) / 10;
51754 cost[where] += retval;
51756 return retval;
51759 /* Implement targetm.vectorize.finish_cost. */
51761 static void
51762 ix86_finish_cost (void *data, unsigned *prologue_cost,
51763 unsigned *body_cost, unsigned *epilogue_cost)
51765 unsigned *cost = (unsigned *) data;
51766 *prologue_cost = cost[vect_prologue];
51767 *body_cost = cost[vect_body];
51768 *epilogue_cost = cost[vect_epilogue];
51771 /* Implement targetm.vectorize.destroy_cost_data. */
51773 static void
51774 ix86_destroy_cost_data (void *data)
51776 free (data);
51779 /* Validate target specific memory model bits in VAL. */
51781 static unsigned HOST_WIDE_INT
51782 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51784 enum memmodel model = memmodel_from_int (val);
51785 bool strong;
51787 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51788 |MEMMODEL_MASK)
51789 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51791 warning (OPT_Winvalid_memory_model,
51792 "Unknown architecture specific memory model");
51793 return MEMMODEL_SEQ_CST;
51795 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51796 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51798 warning (OPT_Winvalid_memory_model,
51799 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51800 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51802 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51804 warning (OPT_Winvalid_memory_model,
51805 "HLE_RELEASE not used with RELEASE or stronger memory model");
51806 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51808 return val;
51811 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51812 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51813 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51814 or number of vecsize_mangle variants that should be emitted. */
51816 static int
51817 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51818 struct cgraph_simd_clone *clonei,
51819 tree base_type, int num)
51821 int ret = 1;
51823 if (clonei->simdlen
51824 && (clonei->simdlen < 2
51825 || clonei->simdlen > 16
51826 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51828 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51829 "unsupported simdlen %d", clonei->simdlen);
51830 return 0;
51833 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51834 if (TREE_CODE (ret_type) != VOID_TYPE)
51835 switch (TYPE_MODE (ret_type))
51837 case QImode:
51838 case HImode:
51839 case SImode:
51840 case DImode:
51841 case SFmode:
51842 case DFmode:
51843 /* case SCmode: */
51844 /* case DCmode: */
51845 break;
51846 default:
51847 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51848 "unsupported return type %qT for simd\n", ret_type);
51849 return 0;
51852 tree t;
51853 int i;
51855 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51856 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51857 switch (TYPE_MODE (TREE_TYPE (t)))
51859 case QImode:
51860 case HImode:
51861 case SImode:
51862 case DImode:
51863 case SFmode:
51864 case DFmode:
51865 /* case SCmode: */
51866 /* case DCmode: */
51867 break;
51868 default:
51869 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51870 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51871 return 0;
51874 if (clonei->cilk_elemental)
51876 /* Parse here processor clause. If not present, default to 'b'. */
51877 clonei->vecsize_mangle = 'b';
51879 else if (!TREE_PUBLIC (node->decl))
51881 /* If the function isn't exported, we can pick up just one ISA
51882 for the clones. */
51883 if (TARGET_AVX2)
51884 clonei->vecsize_mangle = 'd';
51885 else if (TARGET_AVX)
51886 clonei->vecsize_mangle = 'c';
51887 else
51888 clonei->vecsize_mangle = 'b';
51889 ret = 1;
51891 else
51893 clonei->vecsize_mangle = "bcd"[num];
51894 ret = 3;
51896 switch (clonei->vecsize_mangle)
51898 case 'b':
51899 clonei->vecsize_int = 128;
51900 clonei->vecsize_float = 128;
51901 break;
51902 case 'c':
51903 clonei->vecsize_int = 128;
51904 clonei->vecsize_float = 256;
51905 break;
51906 case 'd':
51907 clonei->vecsize_int = 256;
51908 clonei->vecsize_float = 256;
51909 break;
51911 if (clonei->simdlen == 0)
51913 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51914 clonei->simdlen = clonei->vecsize_int;
51915 else
51916 clonei->simdlen = clonei->vecsize_float;
51917 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51918 if (clonei->simdlen > 16)
51919 clonei->simdlen = 16;
51921 return ret;
51924 /* Add target attribute to SIMD clone NODE if needed. */
51926 static void
51927 ix86_simd_clone_adjust (struct cgraph_node *node)
51929 const char *str = NULL;
51930 gcc_assert (node->decl == cfun->decl);
51931 switch (node->simdclone->vecsize_mangle)
51933 case 'b':
51934 if (!TARGET_SSE2)
51935 str = "sse2";
51936 break;
51937 case 'c':
51938 if (!TARGET_AVX)
51939 str = "avx";
51940 break;
51941 case 'd':
51942 if (!TARGET_AVX2)
51943 str = "avx2";
51944 break;
51945 default:
51946 gcc_unreachable ();
51948 if (str == NULL)
51949 return;
51950 push_cfun (NULL);
51951 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51952 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51953 gcc_assert (ok);
51954 pop_cfun ();
51955 ix86_reset_previous_fndecl ();
51956 ix86_set_current_function (node->decl);
51959 /* If SIMD clone NODE can't be used in a vectorized loop
51960 in current function, return -1, otherwise return a badness of using it
51961 (0 if it is most desirable from vecsize_mangle point of view, 1
51962 slightly less desirable, etc.). */
51964 static int
51965 ix86_simd_clone_usable (struct cgraph_node *node)
51967 switch (node->simdclone->vecsize_mangle)
51969 case 'b':
51970 if (!TARGET_SSE2)
51971 return -1;
51972 if (!TARGET_AVX)
51973 return 0;
51974 return TARGET_AVX2 ? 2 : 1;
51975 case 'c':
51976 if (!TARGET_AVX)
51977 return -1;
51978 return TARGET_AVX2 ? 1 : 0;
51979 break;
51980 case 'd':
51981 if (!TARGET_AVX2)
51982 return -1;
51983 return 0;
51984 default:
51985 gcc_unreachable ();
51989 /* This function adjusts the unroll factor based on
51990 the hardware capabilities. For ex, bdver3 has
51991 a loop buffer which makes unrolling of smaller
51992 loops less important. This function decides the
51993 unroll factor using number of memory references
51994 (value 32 is used) as a heuristic. */
51996 static unsigned
51997 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51999 basic_block *bbs;
52000 rtx_insn *insn;
52001 unsigned i;
52002 unsigned mem_count = 0;
52004 if (!TARGET_ADJUST_UNROLL)
52005 return nunroll;
52007 /* Count the number of memory references within the loop body.
52008 This value determines the unrolling factor for bdver3 and bdver4
52009 architectures. */
52010 subrtx_iterator::array_type array;
52011 bbs = get_loop_body (loop);
52012 for (i = 0; i < loop->num_nodes; i++)
52013 FOR_BB_INSNS (bbs[i], insn)
52014 if (NONDEBUG_INSN_P (insn))
52015 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
52016 if (const_rtx x = *iter)
52017 if (MEM_P (x))
52019 machine_mode mode = GET_MODE (x);
52020 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
52021 if (n_words > 4)
52022 mem_count += 2;
52023 else
52024 mem_count += 1;
52026 free (bbs);
52028 if (mem_count && mem_count <=32)
52029 return 32/mem_count;
52031 return nunroll;
52035 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
52037 static bool
52038 ix86_float_exceptions_rounding_supported_p (void)
52040 /* For x87 floating point with standard excess precision handling,
52041 there is no adddf3 pattern (since x87 floating point only has
52042 XFmode operations) so the default hook implementation gets this
52043 wrong. */
52044 return TARGET_80387 || TARGET_SSE_MATH;
52047 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
52049 static void
52050 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
52052 if (!TARGET_80387 && !TARGET_SSE_MATH)
52053 return;
52054 tree exceptions_var = create_tmp_var (integer_type_node);
52055 if (TARGET_80387)
52057 tree fenv_index_type = build_index_type (size_int (6));
52058 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
52059 tree fenv_var = create_tmp_var (fenv_type);
52060 mark_addressable (fenv_var);
52061 tree fenv_ptr = build_pointer_type (fenv_type);
52062 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
52063 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
52064 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
52065 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
52066 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
52067 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
52068 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
52069 tree hold_fnclex = build_call_expr (fnclex, 0);
52070 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
52071 hold_fnclex);
52072 *clear = build_call_expr (fnclex, 0);
52073 tree sw_var = create_tmp_var (short_unsigned_type_node);
52074 tree fnstsw_call = build_call_expr (fnstsw, 0);
52075 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
52076 sw_var, fnstsw_call);
52077 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
52078 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
52079 exceptions_var, exceptions_x87);
52080 *update = build2 (COMPOUND_EXPR, integer_type_node,
52081 sw_mod, update_mod);
52082 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
52083 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
52085 if (TARGET_SSE_MATH)
52087 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
52088 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
52089 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
52090 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
52091 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
52092 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
52093 mxcsr_orig_var, stmxcsr_hold_call);
52094 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
52095 mxcsr_orig_var,
52096 build_int_cst (unsigned_type_node, 0x1f80));
52097 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
52098 build_int_cst (unsigned_type_node, 0xffffffc0));
52099 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
52100 mxcsr_mod_var, hold_mod_val);
52101 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52102 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
52103 hold_assign_orig, hold_assign_mod);
52104 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
52105 ldmxcsr_hold_call);
52106 if (*hold)
52107 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
52108 else
52109 *hold = hold_all;
52110 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52111 if (*clear)
52112 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
52113 ldmxcsr_clear_call);
52114 else
52115 *clear = ldmxcsr_clear_call;
52116 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
52117 tree exceptions_sse = fold_convert (integer_type_node,
52118 stxmcsr_update_call);
52119 if (*update)
52121 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
52122 exceptions_var, exceptions_sse);
52123 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
52124 exceptions_var, exceptions_mod);
52125 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
52126 exceptions_assign);
52128 else
52129 *update = build2 (MODIFY_EXPR, integer_type_node,
52130 exceptions_var, exceptions_sse);
52131 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
52132 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52133 ldmxcsr_update_call);
52135 tree atomic_feraiseexcept
52136 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
52137 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
52138 1, exceptions_var);
52139 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52140 atomic_feraiseexcept_call);
52143 /* Return mode to be used for bounds or VOIDmode
52144 if bounds are not supported. */
52146 static enum machine_mode
52147 ix86_mpx_bound_mode ()
52149 /* Do not support pointer checker if MPX
52150 is not enabled. */
52151 if (!TARGET_MPX)
52153 if (flag_check_pointer_bounds)
52154 warning (0, "Pointer Checker requires MPX support on this target."
52155 " Use -mmpx options to enable MPX.");
52156 return VOIDmode;
52159 return BNDmode;
52162 /* Return constant used to statically initialize constant bounds.
52164 This function is used to create special bound values. For now
52165 only INIT bounds and NONE bounds are expected. More special
52166 values may be added later. */
52168 static tree
52169 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
52171 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52172 : build_zero_cst (pointer_sized_int_node);
52173 tree high = ub ? build_zero_cst (pointer_sized_int_node)
52174 : build_minus_one_cst (pointer_sized_int_node);
52176 /* This function is supposed to be used to create INIT and
52177 NONE bounds only. */
52178 gcc_assert ((lb == 0 && ub == -1)
52179 || (lb == -1 && ub == 0));
52181 return build_complex (NULL, low, high);
52184 /* Generate a list of statements STMTS to initialize pointer bounds
52185 variable VAR with bounds LB and UB. Return the number of generated
52186 statements. */
52188 static int
52189 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52191 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52192 tree lhs, modify, var_p;
52194 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52195 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52197 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52198 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52199 append_to_statement_list (modify, stmts);
52201 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52202 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52203 TYPE_SIZE_UNIT (pointer_sized_int_node)));
52204 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52205 append_to_statement_list (modify, stmts);
52207 return 2;
52210 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52211 /* For i386, common symbol is local only for non-PIE binaries. For
52212 x86-64, common symbol is local only for non-PIE binaries or linker
52213 supports copy reloc in PIE binaries. */
52215 static bool
52216 ix86_binds_local_p (const_tree exp)
52218 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52219 (!flag_pic
52220 || (TARGET_64BIT
52221 && HAVE_LD_PIE_COPYRELOC != 0)));
52223 #endif
52225 /* If MEM is in the form of [base+offset], extract the two parts
52226 of address and set to BASE and OFFSET, otherwise return false. */
52228 static bool
52229 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
52231 rtx addr;
52233 gcc_assert (MEM_P (mem));
52235 addr = XEXP (mem, 0);
52237 if (GET_CODE (addr) == CONST)
52238 addr = XEXP (addr, 0);
52240 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
52242 *base = addr;
52243 *offset = const0_rtx;
52244 return true;
52247 if (GET_CODE (addr) == PLUS
52248 && (REG_P (XEXP (addr, 0))
52249 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
52250 && CONST_INT_P (XEXP (addr, 1)))
52252 *base = XEXP (addr, 0);
52253 *offset = XEXP (addr, 1);
52254 return true;
52257 return false;
52260 /* Given OPERANDS of consecutive load/store, check if we can merge
52261 them into move multiple. LOAD is true if they are load instructions.
52262 MODE is the mode of memory operands. */
52264 bool
52265 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
52266 enum machine_mode mode)
52268 HOST_WIDE_INT offval_1, offval_2, msize;
52269 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
52271 if (load)
52273 mem_1 = operands[1];
52274 mem_2 = operands[3];
52275 reg_1 = operands[0];
52276 reg_2 = operands[2];
52278 else
52280 mem_1 = operands[0];
52281 mem_2 = operands[2];
52282 reg_1 = operands[1];
52283 reg_2 = operands[3];
52286 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
52288 if (REGNO (reg_1) != REGNO (reg_2))
52289 return false;
52291 /* Check if the addresses are in the form of [base+offset]. */
52292 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
52293 return false;
52294 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
52295 return false;
52297 /* Check if the bases are the same. */
52298 if (!rtx_equal_p (base_1, base_2))
52299 return false;
52301 offval_1 = INTVAL (offset_1);
52302 offval_2 = INTVAL (offset_2);
52303 msize = GET_MODE_SIZE (mode);
52304 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
52305 if (offval_1 + msize != offval_2)
52306 return false;
52308 return true;
52311 /* Initialize the GCC target structure. */
52312 #undef TARGET_RETURN_IN_MEMORY
52313 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52315 #undef TARGET_LEGITIMIZE_ADDRESS
52316 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52318 #undef TARGET_ATTRIBUTE_TABLE
52319 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52320 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52321 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52322 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52323 # undef TARGET_MERGE_DECL_ATTRIBUTES
52324 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52325 #endif
52327 #undef TARGET_COMP_TYPE_ATTRIBUTES
52328 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52330 #undef TARGET_INIT_BUILTINS
52331 #define TARGET_INIT_BUILTINS ix86_init_builtins
52332 #undef TARGET_BUILTIN_DECL
52333 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52334 #undef TARGET_EXPAND_BUILTIN
52335 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52337 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52338 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52339 ix86_builtin_vectorized_function
52341 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52342 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52344 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52345 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52347 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52348 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52350 #undef TARGET_BUILTIN_RECIPROCAL
52351 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52353 #undef TARGET_ASM_FUNCTION_EPILOGUE
52354 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52356 #undef TARGET_ENCODE_SECTION_INFO
52357 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52358 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52359 #else
52360 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52361 #endif
52363 #undef TARGET_ASM_OPEN_PAREN
52364 #define TARGET_ASM_OPEN_PAREN ""
52365 #undef TARGET_ASM_CLOSE_PAREN
52366 #define TARGET_ASM_CLOSE_PAREN ""
52368 #undef TARGET_ASM_BYTE_OP
52369 #define TARGET_ASM_BYTE_OP ASM_BYTE
52371 #undef TARGET_ASM_ALIGNED_HI_OP
52372 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52373 #undef TARGET_ASM_ALIGNED_SI_OP
52374 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52375 #ifdef ASM_QUAD
52376 #undef TARGET_ASM_ALIGNED_DI_OP
52377 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52378 #endif
52380 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52381 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52383 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52384 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52386 #undef TARGET_ASM_UNALIGNED_HI_OP
52387 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52388 #undef TARGET_ASM_UNALIGNED_SI_OP
52389 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52390 #undef TARGET_ASM_UNALIGNED_DI_OP
52391 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52393 #undef TARGET_PRINT_OPERAND
52394 #define TARGET_PRINT_OPERAND ix86_print_operand
52395 #undef TARGET_PRINT_OPERAND_ADDRESS
52396 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52397 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52398 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52399 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52400 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52402 #undef TARGET_SCHED_INIT_GLOBAL
52403 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52404 #undef TARGET_SCHED_ADJUST_COST
52405 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52406 #undef TARGET_SCHED_ISSUE_RATE
52407 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52408 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52409 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52410 ia32_multipass_dfa_lookahead
52411 #undef TARGET_SCHED_MACRO_FUSION_P
52412 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52413 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52414 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52416 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52417 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52419 #undef TARGET_MEMMODEL_CHECK
52420 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52422 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52423 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52425 #ifdef HAVE_AS_TLS
52426 #undef TARGET_HAVE_TLS
52427 #define TARGET_HAVE_TLS true
52428 #endif
52429 #undef TARGET_CANNOT_FORCE_CONST_MEM
52430 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52431 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52432 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52434 #undef TARGET_DELEGITIMIZE_ADDRESS
52435 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52437 #undef TARGET_MS_BITFIELD_LAYOUT_P
52438 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52440 #if TARGET_MACHO
52441 #undef TARGET_BINDS_LOCAL_P
52442 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52443 #else
52444 #undef TARGET_BINDS_LOCAL_P
52445 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52446 #endif
52447 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52448 #undef TARGET_BINDS_LOCAL_P
52449 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52450 #endif
52452 #undef TARGET_ASM_OUTPUT_MI_THUNK
52453 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52454 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52455 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52457 #undef TARGET_ASM_FILE_START
52458 #define TARGET_ASM_FILE_START x86_file_start
52460 #undef TARGET_OPTION_OVERRIDE
52461 #define TARGET_OPTION_OVERRIDE ix86_option_override
52463 #undef TARGET_REGISTER_MOVE_COST
52464 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52465 #undef TARGET_MEMORY_MOVE_COST
52466 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52467 #undef TARGET_RTX_COSTS
52468 #define TARGET_RTX_COSTS ix86_rtx_costs
52469 #undef TARGET_ADDRESS_COST
52470 #define TARGET_ADDRESS_COST ix86_address_cost
52472 #undef TARGET_FIXED_CONDITION_CODE_REGS
52473 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52474 #undef TARGET_CC_MODES_COMPATIBLE
52475 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52477 #undef TARGET_MACHINE_DEPENDENT_REORG
52478 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52480 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52481 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52483 #undef TARGET_BUILD_BUILTIN_VA_LIST
52484 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52486 #undef TARGET_FOLD_BUILTIN
52487 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52489 #undef TARGET_COMPARE_VERSION_PRIORITY
52490 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52492 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52493 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52494 ix86_generate_version_dispatcher_body
52496 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52497 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52498 ix86_get_function_versions_dispatcher
52500 #undef TARGET_ENUM_VA_LIST_P
52501 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52503 #undef TARGET_FN_ABI_VA_LIST
52504 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52506 #undef TARGET_CANONICAL_VA_LIST_TYPE
52507 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52509 #undef TARGET_EXPAND_BUILTIN_VA_START
52510 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52512 #undef TARGET_MD_ASM_ADJUST
52513 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
52515 #undef TARGET_PROMOTE_PROTOTYPES
52516 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52517 #undef TARGET_SETUP_INCOMING_VARARGS
52518 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52519 #undef TARGET_MUST_PASS_IN_STACK
52520 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52521 #undef TARGET_FUNCTION_ARG_ADVANCE
52522 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52523 #undef TARGET_FUNCTION_ARG
52524 #define TARGET_FUNCTION_ARG ix86_function_arg
52525 #undef TARGET_INIT_PIC_REG
52526 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52527 #undef TARGET_USE_PSEUDO_PIC_REG
52528 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52529 #undef TARGET_FUNCTION_ARG_BOUNDARY
52530 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52531 #undef TARGET_PASS_BY_REFERENCE
52532 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52533 #undef TARGET_INTERNAL_ARG_POINTER
52534 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52535 #undef TARGET_UPDATE_STACK_BOUNDARY
52536 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52537 #undef TARGET_GET_DRAP_RTX
52538 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52539 #undef TARGET_STRICT_ARGUMENT_NAMING
52540 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52541 #undef TARGET_STATIC_CHAIN
52542 #define TARGET_STATIC_CHAIN ix86_static_chain
52543 #undef TARGET_TRAMPOLINE_INIT
52544 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52545 #undef TARGET_RETURN_POPS_ARGS
52546 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52548 #undef TARGET_LEGITIMATE_COMBINED_INSN
52549 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52551 #undef TARGET_ASAN_SHADOW_OFFSET
52552 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52554 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52555 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52557 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52558 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52560 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52561 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52563 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52564 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52565 ix86_libgcc_floating_mode_supported_p
52567 #undef TARGET_C_MODE_FOR_SUFFIX
52568 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52570 #ifdef HAVE_AS_TLS
52571 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52572 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52573 #endif
52575 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52576 #undef TARGET_INSERT_ATTRIBUTES
52577 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52578 #endif
52580 #undef TARGET_MANGLE_TYPE
52581 #define TARGET_MANGLE_TYPE ix86_mangle_type
52583 #if !TARGET_MACHO
52584 #undef TARGET_STACK_PROTECT_FAIL
52585 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52586 #endif
52588 #undef TARGET_FUNCTION_VALUE
52589 #define TARGET_FUNCTION_VALUE ix86_function_value
52591 #undef TARGET_FUNCTION_VALUE_REGNO_P
52592 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52594 #undef TARGET_PROMOTE_FUNCTION_MODE
52595 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52597 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52598 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52600 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52601 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52603 #undef TARGET_INSTANTIATE_DECLS
52604 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52606 #undef TARGET_SECONDARY_RELOAD
52607 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52609 #undef TARGET_CLASS_MAX_NREGS
52610 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52612 #undef TARGET_PREFERRED_RELOAD_CLASS
52613 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52614 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52615 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52616 #undef TARGET_CLASS_LIKELY_SPILLED_P
52617 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52619 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52620 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52621 ix86_builtin_vectorization_cost
52622 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52623 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52624 ix86_vectorize_vec_perm_const_ok
52625 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52626 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52627 ix86_preferred_simd_mode
52628 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52629 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52630 ix86_autovectorize_vector_sizes
52631 #undef TARGET_VECTORIZE_INIT_COST
52632 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52633 #undef TARGET_VECTORIZE_ADD_STMT_COST
52634 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52635 #undef TARGET_VECTORIZE_FINISH_COST
52636 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52637 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52638 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52640 #undef TARGET_SET_CURRENT_FUNCTION
52641 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52643 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52644 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52646 #undef TARGET_OPTION_SAVE
52647 #define TARGET_OPTION_SAVE ix86_function_specific_save
52649 #undef TARGET_OPTION_RESTORE
52650 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52652 #undef TARGET_OPTION_POST_STREAM_IN
52653 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52655 #undef TARGET_OPTION_PRINT
52656 #define TARGET_OPTION_PRINT ix86_function_specific_print
52658 #undef TARGET_OPTION_FUNCTION_VERSIONS
52659 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52661 #undef TARGET_CAN_INLINE_P
52662 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52664 #undef TARGET_EXPAND_TO_RTL_HOOK
52665 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52667 #undef TARGET_LEGITIMATE_ADDRESS_P
52668 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52670 #undef TARGET_LRA_P
52671 #define TARGET_LRA_P hook_bool_void_true
52673 #undef TARGET_REGISTER_PRIORITY
52674 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52676 #undef TARGET_REGISTER_USAGE_LEVELING_P
52677 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52679 #undef TARGET_LEGITIMATE_CONSTANT_P
52680 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52682 #undef TARGET_FRAME_POINTER_REQUIRED
52683 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52685 #undef TARGET_CAN_ELIMINATE
52686 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52688 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52689 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52691 #undef TARGET_ASM_CODE_END
52692 #define TARGET_ASM_CODE_END ix86_code_end
52694 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52695 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52697 #if TARGET_MACHO
52698 #undef TARGET_INIT_LIBFUNCS
52699 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52700 #endif
52702 #undef TARGET_LOOP_UNROLL_ADJUST
52703 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52705 #undef TARGET_SPILL_CLASS
52706 #define TARGET_SPILL_CLASS ix86_spill_class
52708 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52709 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52710 ix86_simd_clone_compute_vecsize_and_simdlen
52712 #undef TARGET_SIMD_CLONE_ADJUST
52713 #define TARGET_SIMD_CLONE_ADJUST \
52714 ix86_simd_clone_adjust
52716 #undef TARGET_SIMD_CLONE_USABLE
52717 #define TARGET_SIMD_CLONE_USABLE \
52718 ix86_simd_clone_usable
52720 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52721 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52722 ix86_float_exceptions_rounding_supported_p
52724 #undef TARGET_MODE_EMIT
52725 #define TARGET_MODE_EMIT ix86_emit_mode_set
52727 #undef TARGET_MODE_NEEDED
52728 #define TARGET_MODE_NEEDED ix86_mode_needed
52730 #undef TARGET_MODE_AFTER
52731 #define TARGET_MODE_AFTER ix86_mode_after
52733 #undef TARGET_MODE_ENTRY
52734 #define TARGET_MODE_ENTRY ix86_mode_entry
52736 #undef TARGET_MODE_EXIT
52737 #define TARGET_MODE_EXIT ix86_mode_exit
52739 #undef TARGET_MODE_PRIORITY
52740 #define TARGET_MODE_PRIORITY ix86_mode_priority
52742 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52743 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52745 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52746 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52748 #undef TARGET_STORE_BOUNDS_FOR_ARG
52749 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52751 #undef TARGET_LOAD_RETURNED_BOUNDS
52752 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52754 #undef TARGET_STORE_RETURNED_BOUNDS
52755 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52757 #undef TARGET_CHKP_BOUND_MODE
52758 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52760 #undef TARGET_BUILTIN_CHKP_FUNCTION
52761 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52763 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52764 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52766 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52767 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52769 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52770 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52772 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52773 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52775 #undef TARGET_OFFLOAD_OPTIONS
52776 #define TARGET_OFFLOAD_OPTIONS \
52777 ix86_offload_options
52779 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52780 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52782 struct gcc_target targetm = TARGET_INITIALIZER;
52784 #include "gt-i386.h"