PR target/66648
[official-gcc.git] / gcc / config / i386 / i386.c
blob21512c6de574db3b3ec1eb2c9d500204b9987fc9
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "cfghooks.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "rtl.h"
28 #include "df.h"
29 #include "alias.h"
30 #include "fold-const.h"
31 #include "stringpool.h"
32 #include "attribs.h"
33 #include "calls.h"
34 #include "stor-layout.h"
35 #include "varasm.h"
36 #include "tm_p.h"
37 #include "regs.h"
38 #include "insn-config.h"
39 #include "conditions.h"
40 #include "output.h"
41 #include "insn-codes.h"
42 #include "insn-attr.h"
43 #include "flags.h"
44 #include "except.h"
45 #include "recog.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "emit-rtl.h"
50 #include "stmt.h"
51 #include "expr.h"
52 #include "optabs.h"
53 #include "diagnostic-core.h"
54 #include "toplev.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "target.h"
61 #include "common/common-target.h"
62 #include "langhooks.h"
63 #include "reload.h"
64 #include "cgraph.h"
65 #include "internal-fn.h"
66 #include "gimple-fold.h"
67 #include "tree-eh.h"
68 #include "gimplify.h"
69 #include "cfgloop.h"
70 #include "dwarf2.h"
71 #include "tm-constrs.h"
72 #include "params.h"
73 #include "cselib.h"
74 #include "debug.h"
75 #include "sched-int.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87 #include "rtl-iter.h"
88 #include "tree-iterator.h"
89 #include "tree-chkp.h"
90 #include "rtl-chkp.h"
92 /* This file should be included last. */
93 #include "target-def.h"
95 static rtx legitimize_dllimport_symbol (rtx, bool);
96 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
97 static rtx legitimize_pe_coff_symbol (rtx, bool);
99 #ifndef CHECK_STACK_LIMIT
100 #define CHECK_STACK_LIMIT (-1)
101 #endif
103 /* Return index of given mode in mult and division cost tables. */
104 #define MODE_INDEX(mode) \
105 ((mode) == QImode ? 0 \
106 : (mode) == HImode ? 1 \
107 : (mode) == SImode ? 2 \
108 : (mode) == DImode ? 3 \
109 : 4)
111 /* Processor costs (relative to an add) */
112 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
113 #define COSTS_N_BYTES(N) ((N) * 2)
115 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
117 static stringop_algs ix86_size_memcpy[2] = {
118 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
119 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
120 static stringop_algs ix86_size_memset[2] = {
121 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
122 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
124 const
125 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
126 COSTS_N_BYTES (2), /* cost of an add instruction */
127 COSTS_N_BYTES (3), /* cost of a lea instruction */
128 COSTS_N_BYTES (2), /* variable shift costs */
129 COSTS_N_BYTES (3), /* constant shift costs */
130 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
131 COSTS_N_BYTES (3), /* HI */
132 COSTS_N_BYTES (3), /* SI */
133 COSTS_N_BYTES (3), /* DI */
134 COSTS_N_BYTES (5)}, /* other */
135 0, /* cost of multiply per each bit set */
136 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
137 COSTS_N_BYTES (3), /* HI */
138 COSTS_N_BYTES (3), /* SI */
139 COSTS_N_BYTES (3), /* DI */
140 COSTS_N_BYTES (5)}, /* other */
141 COSTS_N_BYTES (3), /* cost of movsx */
142 COSTS_N_BYTES (3), /* cost of movzx */
143 0, /* "large" insn */
144 2, /* MOVE_RATIO */
145 2, /* cost for loading QImode using movzbl */
146 {2, 2, 2}, /* cost of loading integer registers
147 in QImode, HImode and SImode.
148 Relative to reg-reg move (2). */
149 {2, 2, 2}, /* cost of storing integer registers */
150 2, /* cost of reg,reg fld/fst */
151 {2, 2, 2}, /* cost of loading fp registers
152 in SFmode, DFmode and XFmode */
153 {2, 2, 2}, /* cost of storing fp registers
154 in SFmode, DFmode and XFmode */
155 3, /* cost of moving MMX register */
156 {3, 3}, /* cost of loading MMX registers
157 in SImode and DImode */
158 {3, 3}, /* cost of storing MMX registers
159 in SImode and DImode */
160 3, /* cost of moving SSE register */
161 {3, 3, 3}, /* cost of loading SSE registers
162 in SImode, DImode and TImode */
163 {3, 3, 3}, /* cost of storing SSE registers
164 in SImode, DImode and TImode */
165 3, /* MMX or SSE register to integer */
166 0, /* size of l1 cache */
167 0, /* size of l2 cache */
168 0, /* size of prefetch block */
169 0, /* number of parallel prefetches */
170 2, /* Branch cost */
171 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
172 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
173 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
174 COSTS_N_BYTES (2), /* cost of FABS instruction. */
175 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
176 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
177 ix86_size_memcpy,
178 ix86_size_memset,
179 1, /* scalar_stmt_cost. */
180 1, /* scalar load_cost. */
181 1, /* scalar_store_cost. */
182 1, /* vec_stmt_cost. */
183 1, /* vec_to_scalar_cost. */
184 1, /* scalar_to_vec_cost. */
185 1, /* vec_align_load_cost. */
186 1, /* vec_unalign_load_cost. */
187 1, /* vec_store_cost. */
188 1, /* cond_taken_branch_cost. */
189 1, /* cond_not_taken_branch_cost. */
192 /* Processor costs (relative to an add) */
193 static stringop_algs i386_memcpy[2] = {
194 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
195 DUMMY_STRINGOP_ALGS};
196 static stringop_algs i386_memset[2] = {
197 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
198 DUMMY_STRINGOP_ALGS};
200 static const
201 struct processor_costs i386_cost = { /* 386 specific costs */
202 COSTS_N_INSNS (1), /* cost of an add instruction */
203 COSTS_N_INSNS (1), /* cost of a lea instruction */
204 COSTS_N_INSNS (3), /* variable shift costs */
205 COSTS_N_INSNS (2), /* constant shift costs */
206 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
207 COSTS_N_INSNS (6), /* HI */
208 COSTS_N_INSNS (6), /* SI */
209 COSTS_N_INSNS (6), /* DI */
210 COSTS_N_INSNS (6)}, /* other */
211 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
212 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
213 COSTS_N_INSNS (23), /* HI */
214 COSTS_N_INSNS (23), /* SI */
215 COSTS_N_INSNS (23), /* DI */
216 COSTS_N_INSNS (23)}, /* other */
217 COSTS_N_INSNS (3), /* cost of movsx */
218 COSTS_N_INSNS (2), /* cost of movzx */
219 15, /* "large" insn */
220 3, /* MOVE_RATIO */
221 4, /* cost for loading QImode using movzbl */
222 {2, 4, 2}, /* cost of loading integer registers
223 in QImode, HImode and SImode.
224 Relative to reg-reg move (2). */
225 {2, 4, 2}, /* cost of storing integer registers */
226 2, /* cost of reg,reg fld/fst */
227 {8, 8, 8}, /* cost of loading fp registers
228 in SFmode, DFmode and XFmode */
229 {8, 8, 8}, /* cost of storing fp registers
230 in SFmode, DFmode and XFmode */
231 2, /* cost of moving MMX register */
232 {4, 8}, /* cost of loading MMX registers
233 in SImode and DImode */
234 {4, 8}, /* cost of storing MMX registers
235 in SImode and DImode */
236 2, /* cost of moving SSE register */
237 {4, 8, 16}, /* cost of loading SSE registers
238 in SImode, DImode and TImode */
239 {4, 8, 16}, /* cost of storing SSE registers
240 in SImode, DImode and TImode */
241 3, /* MMX or SSE register to integer */
242 0, /* size of l1 cache */
243 0, /* size of l2 cache */
244 0, /* size of prefetch block */
245 0, /* number of parallel prefetches */
246 1, /* Branch cost */
247 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
248 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
249 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
250 COSTS_N_INSNS (22), /* cost of FABS instruction. */
251 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
252 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
253 i386_memcpy,
254 i386_memset,
255 1, /* scalar_stmt_cost. */
256 1, /* scalar load_cost. */
257 1, /* scalar_store_cost. */
258 1, /* vec_stmt_cost. */
259 1, /* vec_to_scalar_cost. */
260 1, /* scalar_to_vec_cost. */
261 1, /* vec_align_load_cost. */
262 2, /* vec_unalign_load_cost. */
263 1, /* vec_store_cost. */
264 3, /* cond_taken_branch_cost. */
265 1, /* cond_not_taken_branch_cost. */
268 static stringop_algs i486_memcpy[2] = {
269 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
270 DUMMY_STRINGOP_ALGS};
271 static stringop_algs i486_memset[2] = {
272 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
273 DUMMY_STRINGOP_ALGS};
275 static const
276 struct processor_costs i486_cost = { /* 486 specific costs */
277 COSTS_N_INSNS (1), /* cost of an add instruction */
278 COSTS_N_INSNS (1), /* cost of a lea instruction */
279 COSTS_N_INSNS (3), /* variable shift costs */
280 COSTS_N_INSNS (2), /* constant shift costs */
281 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
282 COSTS_N_INSNS (12), /* HI */
283 COSTS_N_INSNS (12), /* SI */
284 COSTS_N_INSNS (12), /* DI */
285 COSTS_N_INSNS (12)}, /* other */
286 1, /* cost of multiply per each bit set */
287 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
288 COSTS_N_INSNS (40), /* HI */
289 COSTS_N_INSNS (40), /* SI */
290 COSTS_N_INSNS (40), /* DI */
291 COSTS_N_INSNS (40)}, /* other */
292 COSTS_N_INSNS (3), /* cost of movsx */
293 COSTS_N_INSNS (2), /* cost of movzx */
294 15, /* "large" insn */
295 3, /* MOVE_RATIO */
296 4, /* cost for loading QImode using movzbl */
297 {2, 4, 2}, /* cost of loading integer registers
298 in QImode, HImode and SImode.
299 Relative to reg-reg move (2). */
300 {2, 4, 2}, /* cost of storing integer registers */
301 2, /* cost of reg,reg fld/fst */
302 {8, 8, 8}, /* cost of loading fp registers
303 in SFmode, DFmode and XFmode */
304 {8, 8, 8}, /* cost of storing fp registers
305 in SFmode, DFmode and XFmode */
306 2, /* cost of moving MMX register */
307 {4, 8}, /* cost of loading MMX registers
308 in SImode and DImode */
309 {4, 8}, /* cost of storing MMX registers
310 in SImode and DImode */
311 2, /* cost of moving SSE register */
312 {4, 8, 16}, /* cost of loading SSE registers
313 in SImode, DImode and TImode */
314 {4, 8, 16}, /* cost of storing SSE registers
315 in SImode, DImode and TImode */
316 3, /* MMX or SSE register to integer */
317 4, /* size of l1 cache. 486 has 8kB cache
318 shared for code and data, so 4kB is
319 not really precise. */
320 4, /* size of l2 cache */
321 0, /* size of prefetch block */
322 0, /* number of parallel prefetches */
323 1, /* Branch cost */
324 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
325 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
326 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
327 COSTS_N_INSNS (3), /* cost of FABS instruction. */
328 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
329 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
330 i486_memcpy,
331 i486_memset,
332 1, /* scalar_stmt_cost. */
333 1, /* scalar load_cost. */
334 1, /* scalar_store_cost. */
335 1, /* vec_stmt_cost. */
336 1, /* vec_to_scalar_cost. */
337 1, /* scalar_to_vec_cost. */
338 1, /* vec_align_load_cost. */
339 2, /* vec_unalign_load_cost. */
340 1, /* vec_store_cost. */
341 3, /* cond_taken_branch_cost. */
342 1, /* cond_not_taken_branch_cost. */
345 static stringop_algs pentium_memcpy[2] = {
346 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
347 DUMMY_STRINGOP_ALGS};
348 static stringop_algs pentium_memset[2] = {
349 {libcall, {{-1, rep_prefix_4_byte, false}}},
350 DUMMY_STRINGOP_ALGS};
352 static const
353 struct processor_costs pentium_cost = {
354 COSTS_N_INSNS (1), /* cost of an add instruction */
355 COSTS_N_INSNS (1), /* cost of a lea instruction */
356 COSTS_N_INSNS (4), /* variable shift costs */
357 COSTS_N_INSNS (1), /* constant shift costs */
358 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
359 COSTS_N_INSNS (11), /* HI */
360 COSTS_N_INSNS (11), /* SI */
361 COSTS_N_INSNS (11), /* DI */
362 COSTS_N_INSNS (11)}, /* other */
363 0, /* cost of multiply per each bit set */
364 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
365 COSTS_N_INSNS (25), /* HI */
366 COSTS_N_INSNS (25), /* SI */
367 COSTS_N_INSNS (25), /* DI */
368 COSTS_N_INSNS (25)}, /* other */
369 COSTS_N_INSNS (3), /* cost of movsx */
370 COSTS_N_INSNS (2), /* cost of movzx */
371 8, /* "large" insn */
372 6, /* MOVE_RATIO */
373 6, /* cost for loading QImode using movzbl */
374 {2, 4, 2}, /* cost of loading integer registers
375 in QImode, HImode and SImode.
376 Relative to reg-reg move (2). */
377 {2, 4, 2}, /* cost of storing integer registers */
378 2, /* cost of reg,reg fld/fst */
379 {2, 2, 6}, /* cost of loading fp registers
380 in SFmode, DFmode and XFmode */
381 {4, 4, 6}, /* cost of storing fp registers
382 in SFmode, DFmode and XFmode */
383 8, /* cost of moving MMX register */
384 {8, 8}, /* cost of loading MMX registers
385 in SImode and DImode */
386 {8, 8}, /* cost of storing MMX registers
387 in SImode and DImode */
388 2, /* cost of moving SSE register */
389 {4, 8, 16}, /* cost of loading SSE registers
390 in SImode, DImode and TImode */
391 {4, 8, 16}, /* cost of storing SSE registers
392 in SImode, DImode and TImode */
393 3, /* MMX or SSE register to integer */
394 8, /* size of l1 cache. */
395 8, /* size of l2 cache */
396 0, /* size of prefetch block */
397 0, /* number of parallel prefetches */
398 2, /* Branch cost */
399 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
400 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
401 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
402 COSTS_N_INSNS (1), /* cost of FABS instruction. */
403 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
404 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
405 pentium_memcpy,
406 pentium_memset,
407 1, /* scalar_stmt_cost. */
408 1, /* scalar load_cost. */
409 1, /* scalar_store_cost. */
410 1, /* vec_stmt_cost. */
411 1, /* vec_to_scalar_cost. */
412 1, /* scalar_to_vec_cost. */
413 1, /* vec_align_load_cost. */
414 2, /* vec_unalign_load_cost. */
415 1, /* vec_store_cost. */
416 3, /* cond_taken_branch_cost. */
417 1, /* cond_not_taken_branch_cost. */
420 static const
421 struct processor_costs iamcu_cost = {
422 COSTS_N_INSNS (1), /* cost of an add instruction */
423 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
424 COSTS_N_INSNS (1), /* variable shift costs */
425 COSTS_N_INSNS (1), /* constant shift costs */
426 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
427 COSTS_N_INSNS (11), /* HI */
428 COSTS_N_INSNS (11), /* SI */
429 COSTS_N_INSNS (11), /* DI */
430 COSTS_N_INSNS (11)}, /* other */
431 0, /* cost of multiply per each bit set */
432 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
433 COSTS_N_INSNS (25), /* HI */
434 COSTS_N_INSNS (25), /* SI */
435 COSTS_N_INSNS (25), /* DI */
436 COSTS_N_INSNS (25)}, /* other */
437 COSTS_N_INSNS (3), /* cost of movsx */
438 COSTS_N_INSNS (2), /* cost of movzx */
439 8, /* "large" insn */
440 6, /* MOVE_RATIO */
441 6, /* cost for loading QImode using movzbl */
442 {2, 4, 2}, /* cost of loading integer registers
443 in QImode, HImode and SImode.
444 Relative to reg-reg move (2). */
445 {2, 4, 2}, /* cost of storing integer registers */
446 2, /* cost of reg,reg fld/fst */
447 {2, 2, 6}, /* cost of loading fp registers
448 in SFmode, DFmode and XFmode */
449 {4, 4, 6}, /* cost of storing fp registers
450 in SFmode, DFmode and XFmode */
451 8, /* cost of moving MMX register */
452 {8, 8}, /* cost of loading MMX registers
453 in SImode and DImode */
454 {8, 8}, /* cost of storing MMX registers
455 in SImode and DImode */
456 2, /* cost of moving SSE register */
457 {4, 8, 16}, /* cost of loading SSE registers
458 in SImode, DImode and TImode */
459 {4, 8, 16}, /* cost of storing SSE registers
460 in SImode, DImode and TImode */
461 3, /* MMX or SSE register to integer */
462 8, /* size of l1 cache. */
463 8, /* size of l2 cache */
464 0, /* size of prefetch block */
465 0, /* number of parallel prefetches */
466 2, /* Branch cost */
467 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
468 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
469 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
470 COSTS_N_INSNS (1), /* cost of FABS instruction. */
471 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
472 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
473 pentium_memcpy,
474 pentium_memset,
475 1, /* scalar_stmt_cost. */
476 1, /* scalar load_cost. */
477 1, /* scalar_store_cost. */
478 1, /* vec_stmt_cost. */
479 1, /* vec_to_scalar_cost. */
480 1, /* scalar_to_vec_cost. */
481 1, /* vec_align_load_cost. */
482 2, /* vec_unalign_load_cost. */
483 1, /* vec_store_cost. */
484 3, /* cond_taken_branch_cost. */
485 1, /* cond_not_taken_branch_cost. */
488 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
489 (we ensure the alignment). For small blocks inline loop is still a
490 noticeable win, for bigger blocks either rep movsl or rep movsb is
491 way to go. Rep movsb has apparently more expensive startup time in CPU,
492 but after 4K the difference is down in the noise. */
493 static stringop_algs pentiumpro_memcpy[2] = {
494 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
495 {8192, rep_prefix_4_byte, false},
496 {-1, rep_prefix_1_byte, false}}},
497 DUMMY_STRINGOP_ALGS};
498 static stringop_algs pentiumpro_memset[2] = {
499 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
500 {8192, rep_prefix_4_byte, false},
501 {-1, libcall, false}}},
502 DUMMY_STRINGOP_ALGS};
503 static const
504 struct processor_costs pentiumpro_cost = {
505 COSTS_N_INSNS (1), /* cost of an add instruction */
506 COSTS_N_INSNS (1), /* cost of a lea instruction */
507 COSTS_N_INSNS (1), /* variable shift costs */
508 COSTS_N_INSNS (1), /* constant shift costs */
509 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
510 COSTS_N_INSNS (4), /* HI */
511 COSTS_N_INSNS (4), /* SI */
512 COSTS_N_INSNS (4), /* DI */
513 COSTS_N_INSNS (4)}, /* other */
514 0, /* cost of multiply per each bit set */
515 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
516 COSTS_N_INSNS (17), /* HI */
517 COSTS_N_INSNS (17), /* SI */
518 COSTS_N_INSNS (17), /* DI */
519 COSTS_N_INSNS (17)}, /* other */
520 COSTS_N_INSNS (1), /* cost of movsx */
521 COSTS_N_INSNS (1), /* cost of movzx */
522 8, /* "large" insn */
523 6, /* MOVE_RATIO */
524 2, /* cost for loading QImode using movzbl */
525 {4, 4, 4}, /* cost of loading integer registers
526 in QImode, HImode and SImode.
527 Relative to reg-reg move (2). */
528 {2, 2, 2}, /* cost of storing integer registers */
529 2, /* cost of reg,reg fld/fst */
530 {2, 2, 6}, /* cost of loading fp registers
531 in SFmode, DFmode and XFmode */
532 {4, 4, 6}, /* cost of storing fp registers
533 in SFmode, DFmode and XFmode */
534 2, /* cost of moving MMX register */
535 {2, 2}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {2, 2}, /* cost of storing MMX registers
538 in SImode and DImode */
539 2, /* cost of moving SSE register */
540 {2, 2, 8}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {2, 2, 8}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 3, /* MMX or SSE register to integer */
545 8, /* size of l1 cache. */
546 256, /* size of l2 cache */
547 32, /* size of prefetch block */
548 6, /* number of parallel prefetches */
549 2, /* Branch cost */
550 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (2), /* cost of FABS instruction. */
554 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
556 pentiumpro_memcpy,
557 pentiumpro_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
571 static stringop_algs geode_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs geode_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs geode_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (1), /* cost of a lea instruction */
581 COSTS_N_INSNS (2), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (4), /* HI */
585 COSTS_N_INSNS (7), /* SI */
586 COSTS_N_INSNS (7), /* DI */
587 COSTS_N_INSNS (7)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (23), /* HI */
591 COSTS_N_INSNS (39), /* SI */
592 COSTS_N_INSNS (39), /* DI */
593 COSTS_N_INSNS (39)}, /* other */
594 COSTS_N_INSNS (1), /* cost of movsx */
595 COSTS_N_INSNS (1), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 1, /* cost for loading QImode using movzbl */
599 {1, 1, 1}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {1, 1, 1}, /* cost of storing integer registers */
603 1, /* cost of reg,reg fld/fst */
604 {1, 1, 1}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 6, 6}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
609 1, /* cost of moving MMX register */
610 {1, 1}, /* cost of loading MMX registers
611 in SImode and DImode */
612 {1, 1}, /* cost of storing MMX registers
613 in SImode and DImode */
614 1, /* cost of moving SSE register */
615 {1, 1, 1}, /* cost of loading SSE registers
616 in SImode, DImode and TImode */
617 {1, 1, 1}, /* cost of storing SSE registers
618 in SImode, DImode and TImode */
619 1, /* MMX or SSE register to integer */
620 64, /* size of l1 cache. */
621 128, /* size of l2 cache. */
622 32, /* size of prefetch block */
623 1, /* number of parallel prefetches */
624 1, /* Branch cost */
625 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
626 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
627 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
628 COSTS_N_INSNS (1), /* cost of FABS instruction. */
629 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
630 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
631 geode_memcpy,
632 geode_memset,
633 1, /* scalar_stmt_cost. */
634 1, /* scalar load_cost. */
635 1, /* scalar_store_cost. */
636 1, /* vec_stmt_cost. */
637 1, /* vec_to_scalar_cost. */
638 1, /* scalar_to_vec_cost. */
639 1, /* vec_align_load_cost. */
640 2, /* vec_unalign_load_cost. */
641 1, /* vec_store_cost. */
642 3, /* cond_taken_branch_cost. */
643 1, /* cond_not_taken_branch_cost. */
646 static stringop_algs k6_memcpy[2] = {
647 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
648 DUMMY_STRINGOP_ALGS};
649 static stringop_algs k6_memset[2] = {
650 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
651 DUMMY_STRINGOP_ALGS};
652 static const
653 struct processor_costs k6_cost = {
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (2), /* cost of a lea instruction */
656 COSTS_N_INSNS (1), /* variable shift costs */
657 COSTS_N_INSNS (1), /* constant shift costs */
658 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (3), /* HI */
660 COSTS_N_INSNS (3), /* SI */
661 COSTS_N_INSNS (3), /* DI */
662 COSTS_N_INSNS (3)}, /* other */
663 0, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (18), /* HI */
666 COSTS_N_INSNS (18), /* SI */
667 COSTS_N_INSNS (18), /* DI */
668 COSTS_N_INSNS (18)}, /* other */
669 COSTS_N_INSNS (2), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 8, /* "large" insn */
672 4, /* MOVE_RATIO */
673 3, /* cost for loading QImode using movzbl */
674 {4, 5, 4}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 3, 2}, /* cost of storing integer registers */
678 4, /* cost of reg,reg fld/fst */
679 {6, 6, 6}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {4, 4, 4}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {2, 2}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {2, 2}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {2, 2, 8}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {2, 2, 8}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 6, /* MMX or SSE register to integer */
694 32, /* size of l1 cache. */
695 32, /* size of l2 cache. Some models
696 have integrated l2 cache, but
697 optimizing for k6 is not important
698 enough to worry about that. */
699 32, /* size of prefetch block */
700 1, /* number of parallel prefetches */
701 1, /* Branch cost */
702 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
703 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
704 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
705 COSTS_N_INSNS (2), /* cost of FABS instruction. */
706 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
707 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
708 k6_memcpy,
709 k6_memset,
710 1, /* scalar_stmt_cost. */
711 1, /* scalar load_cost. */
712 1, /* scalar_store_cost. */
713 1, /* vec_stmt_cost. */
714 1, /* vec_to_scalar_cost. */
715 1, /* scalar_to_vec_cost. */
716 1, /* vec_align_load_cost. */
717 2, /* vec_unalign_load_cost. */
718 1, /* vec_store_cost. */
719 3, /* cond_taken_branch_cost. */
720 1, /* cond_not_taken_branch_cost. */
723 /* For some reason, Athlon deals better with REP prefix (relative to loops)
724 compared to K8. Alignment becomes important after 8 bytes for memcpy and
725 128 bytes for memset. */
726 static stringop_algs athlon_memcpy[2] = {
727 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
728 DUMMY_STRINGOP_ALGS};
729 static stringop_algs athlon_memset[2] = {
730 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
731 DUMMY_STRINGOP_ALGS};
732 static const
733 struct processor_costs athlon_cost = {
734 COSTS_N_INSNS (1), /* cost of an add instruction */
735 COSTS_N_INSNS (2), /* cost of a lea instruction */
736 COSTS_N_INSNS (1), /* variable shift costs */
737 COSTS_N_INSNS (1), /* constant shift costs */
738 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
739 COSTS_N_INSNS (5), /* HI */
740 COSTS_N_INSNS (5), /* SI */
741 COSTS_N_INSNS (5), /* DI */
742 COSTS_N_INSNS (5)}, /* other */
743 0, /* cost of multiply per each bit set */
744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
745 COSTS_N_INSNS (26), /* HI */
746 COSTS_N_INSNS (42), /* SI */
747 COSTS_N_INSNS (74), /* DI */
748 COSTS_N_INSNS (74)}, /* other */
749 COSTS_N_INSNS (1), /* cost of movsx */
750 COSTS_N_INSNS (1), /* cost of movzx */
751 8, /* "large" insn */
752 9, /* MOVE_RATIO */
753 4, /* cost for loading QImode using movzbl */
754 {3, 4, 3}, /* cost of loading integer registers
755 in QImode, HImode and SImode.
756 Relative to reg-reg move (2). */
757 {3, 4, 3}, /* cost of storing integer registers */
758 4, /* cost of reg,reg fld/fst */
759 {4, 4, 12}, /* cost of loading fp registers
760 in SFmode, DFmode and XFmode */
761 {6, 6, 8}, /* cost of storing fp registers
762 in SFmode, DFmode and XFmode */
763 2, /* cost of moving MMX register */
764 {4, 4}, /* cost of loading MMX registers
765 in SImode and DImode */
766 {4, 4}, /* cost of storing MMX registers
767 in SImode and DImode */
768 2, /* cost of moving SSE register */
769 {4, 4, 6}, /* cost of loading SSE registers
770 in SImode, DImode and TImode */
771 {4, 4, 5}, /* cost of storing SSE registers
772 in SImode, DImode and TImode */
773 5, /* MMX or SSE register to integer */
774 64, /* size of l1 cache. */
775 256, /* size of l2 cache. */
776 64, /* size of prefetch block */
777 6, /* number of parallel prefetches */
778 5, /* Branch cost */
779 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
780 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
781 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
782 COSTS_N_INSNS (2), /* cost of FABS instruction. */
783 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
784 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
785 athlon_memcpy,
786 athlon_memset,
787 1, /* scalar_stmt_cost. */
788 1, /* scalar load_cost. */
789 1, /* scalar_store_cost. */
790 1, /* vec_stmt_cost. */
791 1, /* vec_to_scalar_cost. */
792 1, /* scalar_to_vec_cost. */
793 1, /* vec_align_load_cost. */
794 2, /* vec_unalign_load_cost. */
795 1, /* vec_store_cost. */
796 3, /* cond_taken_branch_cost. */
797 1, /* cond_not_taken_branch_cost. */
800 /* K8 has optimized REP instruction for medium sized blocks, but for very
801 small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 static stringop_algs k8_memcpy[2] = {
804 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
805 {-1, rep_prefix_4_byte, false}}},
806 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
807 {-1, libcall, false}}}};
808 static stringop_algs k8_memset[2] = {
809 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
810 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
811 {libcall, {{48, unrolled_loop, false},
812 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
813 static const
814 struct processor_costs k8_cost = {
815 COSTS_N_INSNS (1), /* cost of an add instruction */
816 COSTS_N_INSNS (2), /* cost of a lea instruction */
817 COSTS_N_INSNS (1), /* variable shift costs */
818 COSTS_N_INSNS (1), /* constant shift costs */
819 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
820 COSTS_N_INSNS (4), /* HI */
821 COSTS_N_INSNS (3), /* SI */
822 COSTS_N_INSNS (4), /* DI */
823 COSTS_N_INSNS (5)}, /* other */
824 0, /* cost of multiply per each bit set */
825 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
826 COSTS_N_INSNS (26), /* HI */
827 COSTS_N_INSNS (42), /* SI */
828 COSTS_N_INSNS (74), /* DI */
829 COSTS_N_INSNS (74)}, /* other */
830 COSTS_N_INSNS (1), /* cost of movsx */
831 COSTS_N_INSNS (1), /* cost of movzx */
832 8, /* "large" insn */
833 9, /* MOVE_RATIO */
834 4, /* cost for loading QImode using movzbl */
835 {3, 4, 3}, /* cost of loading integer registers
836 in QImode, HImode and SImode.
837 Relative to reg-reg move (2). */
838 {3, 4, 3}, /* cost of storing integer registers */
839 4, /* cost of reg,reg fld/fst */
840 {4, 4, 12}, /* cost of loading fp registers
841 in SFmode, DFmode and XFmode */
842 {6, 6, 8}, /* cost of storing fp registers
843 in SFmode, DFmode and XFmode */
844 2, /* cost of moving MMX register */
845 {3, 3}, /* cost of loading MMX registers
846 in SImode and DImode */
847 {4, 4}, /* cost of storing MMX registers
848 in SImode and DImode */
849 2, /* cost of moving SSE register */
850 {4, 3, 6}, /* cost of loading SSE registers
851 in SImode, DImode and TImode */
852 {4, 4, 5}, /* cost of storing SSE registers
853 in SImode, DImode and TImode */
854 5, /* MMX or SSE register to integer */
855 64, /* size of l1 cache. */
856 512, /* size of l2 cache. */
857 64, /* size of prefetch block */
858 /* New AMD processors never drop prefetches; if they cannot be performed
859 immediately, they are queued. We set number of simultaneous prefetches
860 to a large constant to reflect this (it probably is not a good idea not
861 to limit number of prefetches at all, as their execution also takes some
862 time). */
863 100, /* number of parallel prefetches */
864 3, /* Branch cost */
865 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
866 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
867 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
868 COSTS_N_INSNS (2), /* cost of FABS instruction. */
869 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
870 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
872 k8_memcpy,
873 k8_memset,
874 4, /* scalar_stmt_cost. */
875 2, /* scalar load_cost. */
876 2, /* scalar_store_cost. */
877 5, /* vec_stmt_cost. */
878 0, /* vec_to_scalar_cost. */
879 2, /* scalar_to_vec_cost. */
880 2, /* vec_align_load_cost. */
881 3, /* vec_unalign_load_cost. */
882 3, /* vec_store_cost. */
883 3, /* cond_taken_branch_cost. */
884 2, /* cond_not_taken_branch_cost. */
887 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
888 very small blocks it is better to use loop. For large blocks, libcall can
889 do nontemporary accesses and beat inline considerably. */
890 static stringop_algs amdfam10_memcpy[2] = {
891 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
892 {-1, rep_prefix_4_byte, false}}},
893 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
894 {-1, libcall, false}}}};
895 static stringop_algs amdfam10_memset[2] = {
896 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
897 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
898 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
899 {-1, libcall, false}}}};
900 struct processor_costs amdfam10_cost = {
901 COSTS_N_INSNS (1), /* cost of an add instruction */
902 COSTS_N_INSNS (2), /* cost of a lea instruction */
903 COSTS_N_INSNS (1), /* variable shift costs */
904 COSTS_N_INSNS (1), /* constant shift costs */
905 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
906 COSTS_N_INSNS (4), /* HI */
907 COSTS_N_INSNS (3), /* SI */
908 COSTS_N_INSNS (4), /* DI */
909 COSTS_N_INSNS (5)}, /* other */
910 0, /* cost of multiply per each bit set */
911 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
912 COSTS_N_INSNS (35), /* HI */
913 COSTS_N_INSNS (51), /* SI */
914 COSTS_N_INSNS (83), /* DI */
915 COSTS_N_INSNS (83)}, /* other */
916 COSTS_N_INSNS (1), /* cost of movsx */
917 COSTS_N_INSNS (1), /* cost of movzx */
918 8, /* "large" insn */
919 9, /* MOVE_RATIO */
920 4, /* cost for loading QImode using movzbl */
921 {3, 4, 3}, /* cost of loading integer registers
922 in QImode, HImode and SImode.
923 Relative to reg-reg move (2). */
924 {3, 4, 3}, /* cost of storing integer registers */
925 4, /* cost of reg,reg fld/fst */
926 {4, 4, 12}, /* cost of loading fp registers
927 in SFmode, DFmode and XFmode */
928 {6, 6, 8}, /* cost of storing fp registers
929 in SFmode, DFmode and XFmode */
930 2, /* cost of moving MMX register */
931 {3, 3}, /* cost of loading MMX registers
932 in SImode and DImode */
933 {4, 4}, /* cost of storing MMX registers
934 in SImode and DImode */
935 2, /* cost of moving SSE register */
936 {4, 4, 3}, /* cost of loading SSE registers
937 in SImode, DImode and TImode */
938 {4, 4, 5}, /* cost of storing SSE registers
939 in SImode, DImode and TImode */
940 3, /* MMX or SSE register to integer */
941 /* On K8:
942 MOVD reg64, xmmreg Double FSTORE 4
943 MOVD reg32, xmmreg Double FSTORE 4
944 On AMDFAM10:
945 MOVD reg64, xmmreg Double FADD 3
946 1/1 1/1
947 MOVD reg32, xmmreg Double FADD 3
948 1/1 1/1 */
949 64, /* size of l1 cache. */
950 512, /* size of l2 cache. */
951 64, /* size of prefetch block */
952 /* New AMD processors never drop prefetches; if they cannot be performed
953 immediately, they are queued. We set number of simultaneous prefetches
954 to a large constant to reflect this (it probably is not a good idea not
955 to limit number of prefetches at all, as their execution also takes some
956 time). */
957 100, /* number of parallel prefetches */
958 2, /* Branch cost */
959 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
960 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
961 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
962 COSTS_N_INSNS (2), /* cost of FABS instruction. */
963 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
964 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
966 amdfam10_memcpy,
967 amdfam10_memset,
968 4, /* scalar_stmt_cost. */
969 2, /* scalar load_cost. */
970 2, /* scalar_store_cost. */
971 6, /* vec_stmt_cost. */
972 0, /* vec_to_scalar_cost. */
973 2, /* scalar_to_vec_cost. */
974 2, /* vec_align_load_cost. */
975 2, /* vec_unalign_load_cost. */
976 2, /* vec_store_cost. */
977 2, /* cond_taken_branch_cost. */
978 1, /* cond_not_taken_branch_cost. */
981 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
982 very small blocks it is better to use loop. For large blocks, libcall
983 can do nontemporary accesses and beat inline considerably. */
984 static stringop_algs bdver1_memcpy[2] = {
985 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
986 {-1, rep_prefix_4_byte, false}}},
987 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
988 {-1, libcall, false}}}};
989 static stringop_algs bdver1_memset[2] = {
990 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
991 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
992 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
993 {-1, libcall, false}}}};
995 const struct processor_costs bdver1_cost = {
996 COSTS_N_INSNS (1), /* cost of an add instruction */
997 COSTS_N_INSNS (1), /* cost of a lea instruction */
998 COSTS_N_INSNS (1), /* variable shift costs */
999 COSTS_N_INSNS (1), /* constant shift costs */
1000 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1001 COSTS_N_INSNS (4), /* HI */
1002 COSTS_N_INSNS (4), /* SI */
1003 COSTS_N_INSNS (6), /* DI */
1004 COSTS_N_INSNS (6)}, /* other */
1005 0, /* cost of multiply per each bit set */
1006 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1007 COSTS_N_INSNS (35), /* HI */
1008 COSTS_N_INSNS (51), /* SI */
1009 COSTS_N_INSNS (83), /* DI */
1010 COSTS_N_INSNS (83)}, /* other */
1011 COSTS_N_INSNS (1), /* cost of movsx */
1012 COSTS_N_INSNS (1), /* cost of movzx */
1013 8, /* "large" insn */
1014 9, /* MOVE_RATIO */
1015 4, /* cost for loading QImode using movzbl */
1016 {5, 5, 4}, /* cost of loading integer registers
1017 in QImode, HImode and SImode.
1018 Relative to reg-reg move (2). */
1019 {4, 4, 4}, /* cost of storing integer registers */
1020 2, /* cost of reg,reg fld/fst */
1021 {5, 5, 12}, /* cost of loading fp registers
1022 in SFmode, DFmode and XFmode */
1023 {4, 4, 8}, /* cost of storing fp registers
1024 in SFmode, DFmode and XFmode */
1025 2, /* cost of moving MMX register */
1026 {4, 4}, /* cost of loading MMX registers
1027 in SImode and DImode */
1028 {4, 4}, /* cost of storing MMX registers
1029 in SImode and DImode */
1030 2, /* cost of moving SSE register */
1031 {4, 4, 4}, /* cost of loading SSE registers
1032 in SImode, DImode and TImode */
1033 {4, 4, 4}, /* cost of storing SSE registers
1034 in SImode, DImode and TImode */
1035 2, /* MMX or SSE register to integer */
1036 /* On K8:
1037 MOVD reg64, xmmreg Double FSTORE 4
1038 MOVD reg32, xmmreg Double FSTORE 4
1039 On AMDFAM10:
1040 MOVD reg64, xmmreg Double FADD 3
1041 1/1 1/1
1042 MOVD reg32, xmmreg Double FADD 3
1043 1/1 1/1 */
1044 16, /* size of l1 cache. */
1045 2048, /* size of l2 cache. */
1046 64, /* size of prefetch block */
1047 /* New AMD processors never drop prefetches; if they cannot be performed
1048 immediately, they are queued. We set number of simultaneous prefetches
1049 to a large constant to reflect this (it probably is not a good idea not
1050 to limit number of prefetches at all, as their execution also takes some
1051 time). */
1052 100, /* number of parallel prefetches */
1053 2, /* Branch cost */
1054 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1055 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1056 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1057 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1058 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1059 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1061 bdver1_memcpy,
1062 bdver1_memset,
1063 6, /* scalar_stmt_cost. */
1064 4, /* scalar load_cost. */
1065 4, /* scalar_store_cost. */
1066 6, /* vec_stmt_cost. */
1067 0, /* vec_to_scalar_cost. */
1068 2, /* scalar_to_vec_cost. */
1069 4, /* vec_align_load_cost. */
1070 4, /* vec_unalign_load_cost. */
1071 4, /* vec_store_cost. */
1072 4, /* cond_taken_branch_cost. */
1073 2, /* cond_not_taken_branch_cost. */
1076 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1077 very small blocks it is better to use loop. For large blocks, libcall
1078 can do nontemporary accesses and beat inline considerably. */
1080 static stringop_algs bdver2_memcpy[2] = {
1081 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1082 {-1, rep_prefix_4_byte, false}}},
1083 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1084 {-1, libcall, false}}}};
1085 static stringop_algs bdver2_memset[2] = {
1086 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1087 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1088 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1089 {-1, libcall, false}}}};
1091 const struct processor_costs bdver2_cost = {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (1), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (4), /* SI */
1099 COSTS_N_INSNS (6), /* DI */
1100 COSTS_N_INSNS (6)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (35), /* HI */
1104 COSTS_N_INSNS (51), /* SI */
1105 COSTS_N_INSNS (83), /* DI */
1106 COSTS_N_INSNS (83)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1110 9, /* MOVE_RATIO */
1111 4, /* cost for loading QImode using movzbl */
1112 {5, 5, 4}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {4, 4, 4}, /* cost of storing integer registers */
1116 2, /* cost of reg,reg fld/fst */
1117 {5, 5, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {4, 4, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {4, 4}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 4, 4}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 4}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 2, /* MMX or SSE register to integer */
1132 /* On K8:
1133 MOVD reg64, xmmreg Double FSTORE 4
1134 MOVD reg32, xmmreg Double FSTORE 4
1135 On AMDFAM10:
1136 MOVD reg64, xmmreg Double FADD 3
1137 1/1 1/1
1138 MOVD reg32, xmmreg Double FADD 3
1139 1/1 1/1 */
1140 16, /* size of l1 cache. */
1141 2048, /* size of l2 cache. */
1142 64, /* size of prefetch block */
1143 /* New AMD processors never drop prefetches; if they cannot be performed
1144 immediately, they are queued. We set number of simultaneous prefetches
1145 to a large constant to reflect this (it probably is not a good idea not
1146 to limit number of prefetches at all, as their execution also takes some
1147 time). */
1148 100, /* number of parallel prefetches */
1149 2, /* Branch cost */
1150 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1151 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1152 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1153 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1154 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1155 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1157 bdver2_memcpy,
1158 bdver2_memset,
1159 6, /* scalar_stmt_cost. */
1160 4, /* scalar load_cost. */
1161 4, /* scalar_store_cost. */
1162 6, /* vec_stmt_cost. */
1163 0, /* vec_to_scalar_cost. */
1164 2, /* scalar_to_vec_cost. */
1165 4, /* vec_align_load_cost. */
1166 4, /* vec_unalign_load_cost. */
1167 4, /* vec_store_cost. */
1168 4, /* cond_taken_branch_cost. */
1169 2, /* cond_not_taken_branch_cost. */
1173 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1174 very small blocks it is better to use loop. For large blocks, libcall
1175 can do nontemporary accesses and beat inline considerably. */
1176 static stringop_algs bdver3_memcpy[2] = {
1177 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1178 {-1, rep_prefix_4_byte, false}}},
1179 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1180 {-1, libcall, false}}}};
1181 static stringop_algs bdver3_memset[2] = {
1182 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1183 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1184 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1185 {-1, libcall, false}}}};
1186 struct processor_costs bdver3_cost = {
1187 COSTS_N_INSNS (1), /* cost of an add instruction */
1188 COSTS_N_INSNS (1), /* cost of a lea instruction */
1189 COSTS_N_INSNS (1), /* variable shift costs */
1190 COSTS_N_INSNS (1), /* constant shift costs */
1191 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1192 COSTS_N_INSNS (4), /* HI */
1193 COSTS_N_INSNS (4), /* SI */
1194 COSTS_N_INSNS (6), /* DI */
1195 COSTS_N_INSNS (6)}, /* other */
1196 0, /* cost of multiply per each bit set */
1197 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1198 COSTS_N_INSNS (35), /* HI */
1199 COSTS_N_INSNS (51), /* SI */
1200 COSTS_N_INSNS (83), /* DI */
1201 COSTS_N_INSNS (83)}, /* other */
1202 COSTS_N_INSNS (1), /* cost of movsx */
1203 COSTS_N_INSNS (1), /* cost of movzx */
1204 8, /* "large" insn */
1205 9, /* MOVE_RATIO */
1206 4, /* cost for loading QImode using movzbl */
1207 {5, 5, 4}, /* cost of loading integer registers
1208 in QImode, HImode and SImode.
1209 Relative to reg-reg move (2). */
1210 {4, 4, 4}, /* cost of storing integer registers */
1211 2, /* cost of reg,reg fld/fst */
1212 {5, 5, 12}, /* cost of loading fp registers
1213 in SFmode, DFmode and XFmode */
1214 {4, 4, 8}, /* cost of storing fp registers
1215 in SFmode, DFmode and XFmode */
1216 2, /* cost of moving MMX register */
1217 {4, 4}, /* cost of loading MMX registers
1218 in SImode and DImode */
1219 {4, 4}, /* cost of storing MMX registers
1220 in SImode and DImode */
1221 2, /* cost of moving SSE register */
1222 {4, 4, 4}, /* cost of loading SSE registers
1223 in SImode, DImode and TImode */
1224 {4, 4, 4}, /* cost of storing SSE registers
1225 in SImode, DImode and TImode */
1226 2, /* MMX or SSE register to integer */
1227 16, /* size of l1 cache. */
1228 2048, /* size of l2 cache. */
1229 64, /* size of prefetch block */
1230 /* New AMD processors never drop prefetches; if they cannot be performed
1231 immediately, they are queued. We set number of simultaneous prefetches
1232 to a large constant to reflect this (it probably is not a good idea not
1233 to limit number of prefetches at all, as their execution also takes some
1234 time). */
1235 100, /* number of parallel prefetches */
1236 2, /* Branch cost */
1237 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1244 bdver3_memcpy,
1245 bdver3_memset,
1246 6, /* scalar_stmt_cost. */
1247 4, /* scalar load_cost. */
1248 4, /* scalar_store_cost. */
1249 6, /* vec_stmt_cost. */
1250 0, /* vec_to_scalar_cost. */
1251 2, /* scalar_to_vec_cost. */
1252 4, /* vec_align_load_cost. */
1253 4, /* vec_unalign_load_cost. */
1254 4, /* vec_store_cost. */
1255 4, /* cond_taken_branch_cost. */
1256 2, /* cond_not_taken_branch_cost. */
1259 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1260 very small blocks it is better to use loop. For large blocks, libcall
1261 can do nontemporary accesses and beat inline considerably. */
1262 static stringop_algs bdver4_memcpy[2] = {
1263 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1264 {-1, rep_prefix_4_byte, false}}},
1265 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1266 {-1, libcall, false}}}};
1267 static stringop_algs bdver4_memset[2] = {
1268 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1269 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1270 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1271 {-1, libcall, false}}}};
1272 struct processor_costs bdver4_cost = {
1273 COSTS_N_INSNS (1), /* cost of an add instruction */
1274 COSTS_N_INSNS (1), /* cost of a lea instruction */
1275 COSTS_N_INSNS (1), /* variable shift costs */
1276 COSTS_N_INSNS (1), /* constant shift costs */
1277 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1278 COSTS_N_INSNS (4), /* HI */
1279 COSTS_N_INSNS (4), /* SI */
1280 COSTS_N_INSNS (6), /* DI */
1281 COSTS_N_INSNS (6)}, /* other */
1282 0, /* cost of multiply per each bit set */
1283 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1284 COSTS_N_INSNS (35), /* HI */
1285 COSTS_N_INSNS (51), /* SI */
1286 COSTS_N_INSNS (83), /* DI */
1287 COSTS_N_INSNS (83)}, /* other */
1288 COSTS_N_INSNS (1), /* cost of movsx */
1289 COSTS_N_INSNS (1), /* cost of movzx */
1290 8, /* "large" insn */
1291 9, /* MOVE_RATIO */
1292 4, /* cost for loading QImode using movzbl */
1293 {5, 5, 4}, /* cost of loading integer registers
1294 in QImode, HImode and SImode.
1295 Relative to reg-reg move (2). */
1296 {4, 4, 4}, /* cost of storing integer registers */
1297 2, /* cost of reg,reg fld/fst */
1298 {5, 5, 12}, /* cost of loading fp registers
1299 in SFmode, DFmode and XFmode */
1300 {4, 4, 8}, /* cost of storing fp registers
1301 in SFmode, DFmode and XFmode */
1302 2, /* cost of moving MMX register */
1303 {4, 4}, /* cost of loading MMX registers
1304 in SImode and DImode */
1305 {4, 4}, /* cost of storing MMX registers
1306 in SImode and DImode */
1307 2, /* cost of moving SSE register */
1308 {4, 4, 4}, /* cost of loading SSE registers
1309 in SImode, DImode and TImode */
1310 {4, 4, 4}, /* cost of storing SSE registers
1311 in SImode, DImode and TImode */
1312 2, /* MMX or SSE register to integer */
1313 16, /* size of l1 cache. */
1314 2048, /* size of l2 cache. */
1315 64, /* size of prefetch block */
1316 /* New AMD processors never drop prefetches; if they cannot be performed
1317 immediately, they are queued. We set number of simultaneous prefetches
1318 to a large constant to reflect this (it probably is not a good idea not
1319 to limit number of prefetches at all, as their execution also takes some
1320 time). */
1321 100, /* number of parallel prefetches */
1322 2, /* Branch cost */
1323 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1324 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1325 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1326 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1327 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1328 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1330 bdver4_memcpy,
1331 bdver4_memset,
1332 6, /* scalar_stmt_cost. */
1333 4, /* scalar load_cost. */
1334 4, /* scalar_store_cost. */
1335 6, /* vec_stmt_cost. */
1336 0, /* vec_to_scalar_cost. */
1337 2, /* scalar_to_vec_cost. */
1338 4, /* vec_align_load_cost. */
1339 4, /* vec_unalign_load_cost. */
1340 4, /* vec_store_cost. */
1341 4, /* cond_taken_branch_cost. */
1342 2, /* cond_not_taken_branch_cost. */
1345 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1346 very small blocks it is better to use loop. For large blocks, libcall can
1347 do nontemporary accesses and beat inline considerably. */
1348 static stringop_algs btver1_memcpy[2] = {
1349 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1350 {-1, rep_prefix_4_byte, false}}},
1351 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1352 {-1, libcall, false}}}};
1353 static stringop_algs btver1_memset[2] = {
1354 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1355 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1356 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1357 {-1, libcall, false}}}};
1358 const struct processor_costs btver1_cost = {
1359 COSTS_N_INSNS (1), /* cost of an add instruction */
1360 COSTS_N_INSNS (2), /* cost of a lea instruction */
1361 COSTS_N_INSNS (1), /* variable shift costs */
1362 COSTS_N_INSNS (1), /* constant shift costs */
1363 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1364 COSTS_N_INSNS (4), /* HI */
1365 COSTS_N_INSNS (3), /* SI */
1366 COSTS_N_INSNS (4), /* DI */
1367 COSTS_N_INSNS (5)}, /* other */
1368 0, /* cost of multiply per each bit set */
1369 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1370 COSTS_N_INSNS (35), /* HI */
1371 COSTS_N_INSNS (51), /* SI */
1372 COSTS_N_INSNS (83), /* DI */
1373 COSTS_N_INSNS (83)}, /* other */
1374 COSTS_N_INSNS (1), /* cost of movsx */
1375 COSTS_N_INSNS (1), /* cost of movzx */
1376 8, /* "large" insn */
1377 9, /* MOVE_RATIO */
1378 4, /* cost for loading QImode using movzbl */
1379 {3, 4, 3}, /* cost of loading integer registers
1380 in QImode, HImode and SImode.
1381 Relative to reg-reg move (2). */
1382 {3, 4, 3}, /* cost of storing integer registers */
1383 4, /* cost of reg,reg fld/fst */
1384 {4, 4, 12}, /* cost of loading fp registers
1385 in SFmode, DFmode and XFmode */
1386 {6, 6, 8}, /* cost of storing fp registers
1387 in SFmode, DFmode and XFmode */
1388 2, /* cost of moving MMX register */
1389 {3, 3}, /* cost of loading MMX registers
1390 in SImode and DImode */
1391 {4, 4}, /* cost of storing MMX registers
1392 in SImode and DImode */
1393 2, /* cost of moving SSE register */
1394 {4, 4, 3}, /* cost of loading SSE registers
1395 in SImode, DImode and TImode */
1396 {4, 4, 5}, /* cost of storing SSE registers
1397 in SImode, DImode and TImode */
1398 3, /* MMX or SSE register to integer */
1399 /* On K8:
1400 MOVD reg64, xmmreg Double FSTORE 4
1401 MOVD reg32, xmmreg Double FSTORE 4
1402 On AMDFAM10:
1403 MOVD reg64, xmmreg Double FADD 3
1404 1/1 1/1
1405 MOVD reg32, xmmreg Double FADD 3
1406 1/1 1/1 */
1407 32, /* size of l1 cache. */
1408 512, /* size of l2 cache. */
1409 64, /* size of prefetch block */
1410 100, /* number of parallel prefetches */
1411 2, /* Branch cost */
1412 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1413 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1414 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1415 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1416 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1417 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1419 btver1_memcpy,
1420 btver1_memset,
1421 4, /* scalar_stmt_cost. */
1422 2, /* scalar load_cost. */
1423 2, /* scalar_store_cost. */
1424 6, /* vec_stmt_cost. */
1425 0, /* vec_to_scalar_cost. */
1426 2, /* scalar_to_vec_cost. */
1427 2, /* vec_align_load_cost. */
1428 2, /* vec_unalign_load_cost. */
1429 2, /* vec_store_cost. */
1430 2, /* cond_taken_branch_cost. */
1431 1, /* cond_not_taken_branch_cost. */
1434 static stringop_algs btver2_memcpy[2] = {
1435 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1436 {-1, rep_prefix_4_byte, false}}},
1437 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1438 {-1, libcall, false}}}};
1439 static stringop_algs btver2_memset[2] = {
1440 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1441 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1442 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1443 {-1, libcall, false}}}};
1444 const struct processor_costs btver2_cost = {
1445 COSTS_N_INSNS (1), /* cost of an add instruction */
1446 COSTS_N_INSNS (2), /* cost of a lea instruction */
1447 COSTS_N_INSNS (1), /* variable shift costs */
1448 COSTS_N_INSNS (1), /* constant shift costs */
1449 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1450 COSTS_N_INSNS (4), /* HI */
1451 COSTS_N_INSNS (3), /* SI */
1452 COSTS_N_INSNS (4), /* DI */
1453 COSTS_N_INSNS (5)}, /* other */
1454 0, /* cost of multiply per each bit set */
1455 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1456 COSTS_N_INSNS (35), /* HI */
1457 COSTS_N_INSNS (51), /* SI */
1458 COSTS_N_INSNS (83), /* DI */
1459 COSTS_N_INSNS (83)}, /* other */
1460 COSTS_N_INSNS (1), /* cost of movsx */
1461 COSTS_N_INSNS (1), /* cost of movzx */
1462 8, /* "large" insn */
1463 9, /* MOVE_RATIO */
1464 4, /* cost for loading QImode using movzbl */
1465 {3, 4, 3}, /* cost of loading integer registers
1466 in QImode, HImode and SImode.
1467 Relative to reg-reg move (2). */
1468 {3, 4, 3}, /* cost of storing integer registers */
1469 4, /* cost of reg,reg fld/fst */
1470 {4, 4, 12}, /* cost of loading fp registers
1471 in SFmode, DFmode and XFmode */
1472 {6, 6, 8}, /* cost of storing fp registers
1473 in SFmode, DFmode and XFmode */
1474 2, /* cost of moving MMX register */
1475 {3, 3}, /* cost of loading MMX registers
1476 in SImode and DImode */
1477 {4, 4}, /* cost of storing MMX registers
1478 in SImode and DImode */
1479 2, /* cost of moving SSE register */
1480 {4, 4, 3}, /* cost of loading SSE registers
1481 in SImode, DImode and TImode */
1482 {4, 4, 5}, /* cost of storing SSE registers
1483 in SImode, DImode and TImode */
1484 3, /* MMX or SSE register to integer */
1485 /* On K8:
1486 MOVD reg64, xmmreg Double FSTORE 4
1487 MOVD reg32, xmmreg Double FSTORE 4
1488 On AMDFAM10:
1489 MOVD reg64, xmmreg Double FADD 3
1490 1/1 1/1
1491 MOVD reg32, xmmreg Double FADD 3
1492 1/1 1/1 */
1493 32, /* size of l1 cache. */
1494 2048, /* size of l2 cache. */
1495 64, /* size of prefetch block */
1496 100, /* number of parallel prefetches */
1497 2, /* Branch cost */
1498 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1499 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1500 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1501 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1502 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1503 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1504 btver2_memcpy,
1505 btver2_memset,
1506 4, /* scalar_stmt_cost. */
1507 2, /* scalar load_cost. */
1508 2, /* scalar_store_cost. */
1509 6, /* vec_stmt_cost. */
1510 0, /* vec_to_scalar_cost. */
1511 2, /* scalar_to_vec_cost. */
1512 2, /* vec_align_load_cost. */
1513 2, /* vec_unalign_load_cost. */
1514 2, /* vec_store_cost. */
1515 2, /* cond_taken_branch_cost. */
1516 1, /* cond_not_taken_branch_cost. */
1519 static stringop_algs pentium4_memcpy[2] = {
1520 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1521 DUMMY_STRINGOP_ALGS};
1522 static stringop_algs pentium4_memset[2] = {
1523 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1524 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1525 DUMMY_STRINGOP_ALGS};
1527 static const
1528 struct processor_costs pentium4_cost = {
1529 COSTS_N_INSNS (1), /* cost of an add instruction */
1530 COSTS_N_INSNS (3), /* cost of a lea instruction */
1531 COSTS_N_INSNS (4), /* variable shift costs */
1532 COSTS_N_INSNS (4), /* constant shift costs */
1533 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1534 COSTS_N_INSNS (15), /* HI */
1535 COSTS_N_INSNS (15), /* SI */
1536 COSTS_N_INSNS (15), /* DI */
1537 COSTS_N_INSNS (15)}, /* other */
1538 0, /* cost of multiply per each bit set */
1539 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1540 COSTS_N_INSNS (56), /* HI */
1541 COSTS_N_INSNS (56), /* SI */
1542 COSTS_N_INSNS (56), /* DI */
1543 COSTS_N_INSNS (56)}, /* other */
1544 COSTS_N_INSNS (1), /* cost of movsx */
1545 COSTS_N_INSNS (1), /* cost of movzx */
1546 16, /* "large" insn */
1547 6, /* MOVE_RATIO */
1548 2, /* cost for loading QImode using movzbl */
1549 {4, 5, 4}, /* cost of loading integer registers
1550 in QImode, HImode and SImode.
1551 Relative to reg-reg move (2). */
1552 {2, 3, 2}, /* cost of storing integer registers */
1553 2, /* cost of reg,reg fld/fst */
1554 {2, 2, 6}, /* cost of loading fp registers
1555 in SFmode, DFmode and XFmode */
1556 {4, 4, 6}, /* cost of storing fp registers
1557 in SFmode, DFmode and XFmode */
1558 2, /* cost of moving MMX register */
1559 {2, 2}, /* cost of loading MMX registers
1560 in SImode and DImode */
1561 {2, 2}, /* cost of storing MMX registers
1562 in SImode and DImode */
1563 12, /* cost of moving SSE register */
1564 {12, 12, 12}, /* cost of loading SSE registers
1565 in SImode, DImode and TImode */
1566 {2, 2, 8}, /* cost of storing SSE registers
1567 in SImode, DImode and TImode */
1568 10, /* MMX or SSE register to integer */
1569 8, /* size of l1 cache. */
1570 256, /* size of l2 cache. */
1571 64, /* size of prefetch block */
1572 6, /* number of parallel prefetches */
1573 2, /* Branch cost */
1574 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1575 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1576 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1577 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1578 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1579 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1580 pentium4_memcpy,
1581 pentium4_memset,
1582 1, /* scalar_stmt_cost. */
1583 1, /* scalar load_cost. */
1584 1, /* scalar_store_cost. */
1585 1, /* vec_stmt_cost. */
1586 1, /* vec_to_scalar_cost. */
1587 1, /* scalar_to_vec_cost. */
1588 1, /* vec_align_load_cost. */
1589 2, /* vec_unalign_load_cost. */
1590 1, /* vec_store_cost. */
1591 3, /* cond_taken_branch_cost. */
1592 1, /* cond_not_taken_branch_cost. */
1595 static stringop_algs nocona_memcpy[2] = {
1596 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1597 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1598 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1600 static stringop_algs nocona_memset[2] = {
1601 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1602 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1603 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1604 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1606 static const
1607 struct processor_costs nocona_cost = {
1608 COSTS_N_INSNS (1), /* cost of an add instruction */
1609 COSTS_N_INSNS (1), /* cost of a lea instruction */
1610 COSTS_N_INSNS (1), /* variable shift costs */
1611 COSTS_N_INSNS (1), /* constant shift costs */
1612 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1613 COSTS_N_INSNS (10), /* HI */
1614 COSTS_N_INSNS (10), /* SI */
1615 COSTS_N_INSNS (10), /* DI */
1616 COSTS_N_INSNS (10)}, /* other */
1617 0, /* cost of multiply per each bit set */
1618 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1619 COSTS_N_INSNS (66), /* HI */
1620 COSTS_N_INSNS (66), /* SI */
1621 COSTS_N_INSNS (66), /* DI */
1622 COSTS_N_INSNS (66)}, /* other */
1623 COSTS_N_INSNS (1), /* cost of movsx */
1624 COSTS_N_INSNS (1), /* cost of movzx */
1625 16, /* "large" insn */
1626 17, /* MOVE_RATIO */
1627 4, /* cost for loading QImode using movzbl */
1628 {4, 4, 4}, /* cost of loading integer registers
1629 in QImode, HImode and SImode.
1630 Relative to reg-reg move (2). */
1631 {4, 4, 4}, /* cost of storing integer registers */
1632 3, /* cost of reg,reg fld/fst */
1633 {12, 12, 12}, /* cost of loading fp registers
1634 in SFmode, DFmode and XFmode */
1635 {4, 4, 4}, /* cost of storing fp registers
1636 in SFmode, DFmode and XFmode */
1637 6, /* cost of moving MMX register */
1638 {12, 12}, /* cost of loading MMX registers
1639 in SImode and DImode */
1640 {12, 12}, /* cost of storing MMX registers
1641 in SImode and DImode */
1642 6, /* cost of moving SSE register */
1643 {12, 12, 12}, /* cost of loading SSE registers
1644 in SImode, DImode and TImode */
1645 {12, 12, 12}, /* cost of storing SSE registers
1646 in SImode, DImode and TImode */
1647 8, /* MMX or SSE register to integer */
1648 8, /* size of l1 cache. */
1649 1024, /* size of l2 cache. */
1650 64, /* size of prefetch block */
1651 8, /* number of parallel prefetches */
1652 1, /* Branch cost */
1653 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1654 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1655 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1656 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1657 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1658 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1659 nocona_memcpy,
1660 nocona_memset,
1661 1, /* scalar_stmt_cost. */
1662 1, /* scalar load_cost. */
1663 1, /* scalar_store_cost. */
1664 1, /* vec_stmt_cost. */
1665 1, /* vec_to_scalar_cost. */
1666 1, /* scalar_to_vec_cost. */
1667 1, /* vec_align_load_cost. */
1668 2, /* vec_unalign_load_cost. */
1669 1, /* vec_store_cost. */
1670 3, /* cond_taken_branch_cost. */
1671 1, /* cond_not_taken_branch_cost. */
1674 static stringop_algs atom_memcpy[2] = {
1675 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1676 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1677 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1678 static stringop_algs atom_memset[2] = {
1679 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1680 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1681 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1682 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1683 static const
1684 struct processor_costs atom_cost = {
1685 COSTS_N_INSNS (1), /* cost of an add instruction */
1686 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1687 COSTS_N_INSNS (1), /* variable shift costs */
1688 COSTS_N_INSNS (1), /* constant shift costs */
1689 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1690 COSTS_N_INSNS (4), /* HI */
1691 COSTS_N_INSNS (3), /* SI */
1692 COSTS_N_INSNS (4), /* DI */
1693 COSTS_N_INSNS (2)}, /* other */
1694 0, /* cost of multiply per each bit set */
1695 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1696 COSTS_N_INSNS (26), /* HI */
1697 COSTS_N_INSNS (42), /* SI */
1698 COSTS_N_INSNS (74), /* DI */
1699 COSTS_N_INSNS (74)}, /* other */
1700 COSTS_N_INSNS (1), /* cost of movsx */
1701 COSTS_N_INSNS (1), /* cost of movzx */
1702 8, /* "large" insn */
1703 17, /* MOVE_RATIO */
1704 4, /* cost for loading QImode using movzbl */
1705 {4, 4, 4}, /* cost of loading integer registers
1706 in QImode, HImode and SImode.
1707 Relative to reg-reg move (2). */
1708 {4, 4, 4}, /* cost of storing integer registers */
1709 4, /* cost of reg,reg fld/fst */
1710 {12, 12, 12}, /* cost of loading fp registers
1711 in SFmode, DFmode and XFmode */
1712 {6, 6, 8}, /* cost of storing fp registers
1713 in SFmode, DFmode and XFmode */
1714 2, /* cost of moving MMX register */
1715 {8, 8}, /* cost of loading MMX registers
1716 in SImode and DImode */
1717 {8, 8}, /* cost of storing MMX registers
1718 in SImode and DImode */
1719 2, /* cost of moving SSE register */
1720 {8, 8, 8}, /* cost of loading SSE registers
1721 in SImode, DImode and TImode */
1722 {8, 8, 8}, /* cost of storing SSE registers
1723 in SImode, DImode and TImode */
1724 5, /* MMX or SSE register to integer */
1725 32, /* size of l1 cache. */
1726 256, /* size of l2 cache. */
1727 64, /* size of prefetch block */
1728 6, /* number of parallel prefetches */
1729 3, /* Branch cost */
1730 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1731 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1732 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1733 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1734 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1735 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1736 atom_memcpy,
1737 atom_memset,
1738 1, /* scalar_stmt_cost. */
1739 1, /* scalar load_cost. */
1740 1, /* scalar_store_cost. */
1741 1, /* vec_stmt_cost. */
1742 1, /* vec_to_scalar_cost. */
1743 1, /* scalar_to_vec_cost. */
1744 1, /* vec_align_load_cost. */
1745 2, /* vec_unalign_load_cost. */
1746 1, /* vec_store_cost. */
1747 3, /* cond_taken_branch_cost. */
1748 1, /* cond_not_taken_branch_cost. */
1751 static stringop_algs slm_memcpy[2] = {
1752 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1753 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1754 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1755 static stringop_algs slm_memset[2] = {
1756 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1757 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1758 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1759 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1760 static const
1761 struct processor_costs slm_cost = {
1762 COSTS_N_INSNS (1), /* cost of an add instruction */
1763 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1764 COSTS_N_INSNS (1), /* variable shift costs */
1765 COSTS_N_INSNS (1), /* constant shift costs */
1766 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1767 COSTS_N_INSNS (3), /* HI */
1768 COSTS_N_INSNS (3), /* SI */
1769 COSTS_N_INSNS (4), /* DI */
1770 COSTS_N_INSNS (2)}, /* other */
1771 0, /* cost of multiply per each bit set */
1772 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1773 COSTS_N_INSNS (26), /* HI */
1774 COSTS_N_INSNS (42), /* SI */
1775 COSTS_N_INSNS (74), /* DI */
1776 COSTS_N_INSNS (74)}, /* other */
1777 COSTS_N_INSNS (1), /* cost of movsx */
1778 COSTS_N_INSNS (1), /* cost of movzx */
1779 8, /* "large" insn */
1780 17, /* MOVE_RATIO */
1781 4, /* cost for loading QImode using movzbl */
1782 {4, 4, 4}, /* cost of loading integer registers
1783 in QImode, HImode and SImode.
1784 Relative to reg-reg move (2). */
1785 {4, 4, 4}, /* cost of storing integer registers */
1786 4, /* cost of reg,reg fld/fst */
1787 {12, 12, 12}, /* cost of loading fp registers
1788 in SFmode, DFmode and XFmode */
1789 {6, 6, 8}, /* cost of storing fp registers
1790 in SFmode, DFmode and XFmode */
1791 2, /* cost of moving MMX register */
1792 {8, 8}, /* cost of loading MMX registers
1793 in SImode and DImode */
1794 {8, 8}, /* cost of storing MMX registers
1795 in SImode and DImode */
1796 2, /* cost of moving SSE register */
1797 {8, 8, 8}, /* cost of loading SSE registers
1798 in SImode, DImode and TImode */
1799 {8, 8, 8}, /* cost of storing SSE registers
1800 in SImode, DImode and TImode */
1801 5, /* MMX or SSE register to integer */
1802 32, /* size of l1 cache. */
1803 256, /* size of l2 cache. */
1804 64, /* size of prefetch block */
1805 6, /* number of parallel prefetches */
1806 3, /* Branch cost */
1807 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1808 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1809 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1810 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1811 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1812 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1813 slm_memcpy,
1814 slm_memset,
1815 1, /* scalar_stmt_cost. */
1816 1, /* scalar load_cost. */
1817 1, /* scalar_store_cost. */
1818 1, /* vec_stmt_cost. */
1819 4, /* vec_to_scalar_cost. */
1820 1, /* scalar_to_vec_cost. */
1821 1, /* vec_align_load_cost. */
1822 2, /* vec_unalign_load_cost. */
1823 1, /* vec_store_cost. */
1824 3, /* cond_taken_branch_cost. */
1825 1, /* cond_not_taken_branch_cost. */
1828 static stringop_algs intel_memcpy[2] = {
1829 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1830 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1831 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1832 static stringop_algs intel_memset[2] = {
1833 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1834 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1835 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1836 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1837 static const
1838 struct processor_costs intel_cost = {
1839 COSTS_N_INSNS (1), /* cost of an add instruction */
1840 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1841 COSTS_N_INSNS (1), /* variable shift costs */
1842 COSTS_N_INSNS (1), /* constant shift costs */
1843 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1844 COSTS_N_INSNS (3), /* HI */
1845 COSTS_N_INSNS (3), /* SI */
1846 COSTS_N_INSNS (4), /* DI */
1847 COSTS_N_INSNS (2)}, /* other */
1848 0, /* cost of multiply per each bit set */
1849 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1850 COSTS_N_INSNS (26), /* HI */
1851 COSTS_N_INSNS (42), /* SI */
1852 COSTS_N_INSNS (74), /* DI */
1853 COSTS_N_INSNS (74)}, /* other */
1854 COSTS_N_INSNS (1), /* cost of movsx */
1855 COSTS_N_INSNS (1), /* cost of movzx */
1856 8, /* "large" insn */
1857 17, /* MOVE_RATIO */
1858 4, /* cost for loading QImode using movzbl */
1859 {4, 4, 4}, /* cost of loading integer registers
1860 in QImode, HImode and SImode.
1861 Relative to reg-reg move (2). */
1862 {4, 4, 4}, /* cost of storing integer registers */
1863 4, /* cost of reg,reg fld/fst */
1864 {12, 12, 12}, /* cost of loading fp registers
1865 in SFmode, DFmode and XFmode */
1866 {6, 6, 8}, /* cost of storing fp registers
1867 in SFmode, DFmode and XFmode */
1868 2, /* cost of moving MMX register */
1869 {8, 8}, /* cost of loading MMX registers
1870 in SImode and DImode */
1871 {8, 8}, /* cost of storing MMX registers
1872 in SImode and DImode */
1873 2, /* cost of moving SSE register */
1874 {8, 8, 8}, /* cost of loading SSE registers
1875 in SImode, DImode and TImode */
1876 {8, 8, 8}, /* cost of storing SSE registers
1877 in SImode, DImode and TImode */
1878 5, /* MMX or SSE register to integer */
1879 32, /* size of l1 cache. */
1880 256, /* size of l2 cache. */
1881 64, /* size of prefetch block */
1882 6, /* number of parallel prefetches */
1883 3, /* Branch cost */
1884 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1885 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1886 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1887 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1888 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1889 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1890 intel_memcpy,
1891 intel_memset,
1892 1, /* scalar_stmt_cost. */
1893 1, /* scalar load_cost. */
1894 1, /* scalar_store_cost. */
1895 1, /* vec_stmt_cost. */
1896 4, /* vec_to_scalar_cost. */
1897 1, /* scalar_to_vec_cost. */
1898 1, /* vec_align_load_cost. */
1899 2, /* vec_unalign_load_cost. */
1900 1, /* vec_store_cost. */
1901 3, /* cond_taken_branch_cost. */
1902 1, /* cond_not_taken_branch_cost. */
1905 /* Generic should produce code tuned for Core-i7 (and newer chips)
1906 and btver1 (and newer chips). */
1908 static stringop_algs generic_memcpy[2] = {
1909 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1910 {-1, libcall, false}}},
1911 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1912 {-1, libcall, false}}}};
1913 static stringop_algs generic_memset[2] = {
1914 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1915 {-1, libcall, false}}},
1916 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1917 {-1, libcall, false}}}};
1918 static const
1919 struct processor_costs generic_cost = {
1920 COSTS_N_INSNS (1), /* cost of an add instruction */
1921 /* On all chips taken into consideration lea is 2 cycles and more. With
1922 this cost however our current implementation of synth_mult results in
1923 use of unnecessary temporary registers causing regression on several
1924 SPECfp benchmarks. */
1925 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1926 COSTS_N_INSNS (1), /* variable shift costs */
1927 COSTS_N_INSNS (1), /* constant shift costs */
1928 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1929 COSTS_N_INSNS (4), /* HI */
1930 COSTS_N_INSNS (3), /* SI */
1931 COSTS_N_INSNS (4), /* DI */
1932 COSTS_N_INSNS (2)}, /* other */
1933 0, /* cost of multiply per each bit set */
1934 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1935 COSTS_N_INSNS (26), /* HI */
1936 COSTS_N_INSNS (42), /* SI */
1937 COSTS_N_INSNS (74), /* DI */
1938 COSTS_N_INSNS (74)}, /* other */
1939 COSTS_N_INSNS (1), /* cost of movsx */
1940 COSTS_N_INSNS (1), /* cost of movzx */
1941 8, /* "large" insn */
1942 17, /* MOVE_RATIO */
1943 4, /* cost for loading QImode using movzbl */
1944 {4, 4, 4}, /* cost of loading integer registers
1945 in QImode, HImode and SImode.
1946 Relative to reg-reg move (2). */
1947 {4, 4, 4}, /* cost of storing integer registers */
1948 4, /* cost of reg,reg fld/fst */
1949 {12, 12, 12}, /* cost of loading fp registers
1950 in SFmode, DFmode and XFmode */
1951 {6, 6, 8}, /* cost of storing fp registers
1952 in SFmode, DFmode and XFmode */
1953 2, /* cost of moving MMX register */
1954 {8, 8}, /* cost of loading MMX registers
1955 in SImode and DImode */
1956 {8, 8}, /* cost of storing MMX registers
1957 in SImode and DImode */
1958 2, /* cost of moving SSE register */
1959 {8, 8, 8}, /* cost of loading SSE registers
1960 in SImode, DImode and TImode */
1961 {8, 8, 8}, /* cost of storing SSE registers
1962 in SImode, DImode and TImode */
1963 5, /* MMX or SSE register to integer */
1964 32, /* size of l1 cache. */
1965 512, /* size of l2 cache. */
1966 64, /* size of prefetch block */
1967 6, /* number of parallel prefetches */
1968 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1969 value is increased to perhaps more appropriate value of 5. */
1970 3, /* Branch cost */
1971 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1972 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1973 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1974 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1975 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1976 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1977 generic_memcpy,
1978 generic_memset,
1979 1, /* scalar_stmt_cost. */
1980 1, /* scalar load_cost. */
1981 1, /* scalar_store_cost. */
1982 1, /* vec_stmt_cost. */
1983 1, /* vec_to_scalar_cost. */
1984 1, /* scalar_to_vec_cost. */
1985 1, /* vec_align_load_cost. */
1986 2, /* vec_unalign_load_cost. */
1987 1, /* vec_store_cost. */
1988 3, /* cond_taken_branch_cost. */
1989 1, /* cond_not_taken_branch_cost. */
1992 /* core_cost should produce code tuned for Core familly of CPUs. */
1993 static stringop_algs core_memcpy[2] = {
1994 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1995 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1996 {-1, libcall, false}}}};
1997 static stringop_algs core_memset[2] = {
1998 {libcall, {{6, loop_1_byte, true},
1999 {24, loop, true},
2000 {8192, rep_prefix_4_byte, true},
2001 {-1, libcall, false}}},
2002 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
2003 {-1, libcall, false}}}};
2005 static const
2006 struct processor_costs core_cost = {
2007 COSTS_N_INSNS (1), /* cost of an add instruction */
2008 /* On all chips taken into consideration lea is 2 cycles and more. With
2009 this cost however our current implementation of synth_mult results in
2010 use of unnecessary temporary registers causing regression on several
2011 SPECfp benchmarks. */
2012 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
2013 COSTS_N_INSNS (1), /* variable shift costs */
2014 COSTS_N_INSNS (1), /* constant shift costs */
2015 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
2016 COSTS_N_INSNS (4), /* HI */
2017 COSTS_N_INSNS (3), /* SI */
2018 COSTS_N_INSNS (4), /* DI */
2019 COSTS_N_INSNS (2)}, /* other */
2020 0, /* cost of multiply per each bit set */
2021 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
2022 COSTS_N_INSNS (26), /* HI */
2023 COSTS_N_INSNS (42), /* SI */
2024 COSTS_N_INSNS (74), /* DI */
2025 COSTS_N_INSNS (74)}, /* other */
2026 COSTS_N_INSNS (1), /* cost of movsx */
2027 COSTS_N_INSNS (1), /* cost of movzx */
2028 8, /* "large" insn */
2029 17, /* MOVE_RATIO */
2030 4, /* cost for loading QImode using movzbl */
2031 {4, 4, 4}, /* cost of loading integer registers
2032 in QImode, HImode and SImode.
2033 Relative to reg-reg move (2). */
2034 {4, 4, 4}, /* cost of storing integer registers */
2035 4, /* cost of reg,reg fld/fst */
2036 {12, 12, 12}, /* cost of loading fp registers
2037 in SFmode, DFmode and XFmode */
2038 {6, 6, 8}, /* cost of storing fp registers
2039 in SFmode, DFmode and XFmode */
2040 2, /* cost of moving MMX register */
2041 {8, 8}, /* cost of loading MMX registers
2042 in SImode and DImode */
2043 {8, 8}, /* cost of storing MMX registers
2044 in SImode and DImode */
2045 2, /* cost of moving SSE register */
2046 {8, 8, 8}, /* cost of loading SSE registers
2047 in SImode, DImode and TImode */
2048 {8, 8, 8}, /* cost of storing SSE registers
2049 in SImode, DImode and TImode */
2050 5, /* MMX or SSE register to integer */
2051 64, /* size of l1 cache. */
2052 512, /* size of l2 cache. */
2053 64, /* size of prefetch block */
2054 6, /* number of parallel prefetches */
2055 /* FIXME perhaps more appropriate value is 5. */
2056 3, /* Branch cost */
2057 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
2058 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
2059 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
2060 COSTS_N_INSNS (8), /* cost of FABS instruction. */
2061 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
2062 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
2063 core_memcpy,
2064 core_memset,
2065 1, /* scalar_stmt_cost. */
2066 1, /* scalar load_cost. */
2067 1, /* scalar_store_cost. */
2068 1, /* vec_stmt_cost. */
2069 1, /* vec_to_scalar_cost. */
2070 1, /* scalar_to_vec_cost. */
2071 1, /* vec_align_load_cost. */
2072 2, /* vec_unalign_load_cost. */
2073 1, /* vec_store_cost. */
2074 3, /* cond_taken_branch_cost. */
2075 1, /* cond_not_taken_branch_cost. */
2079 /* Set by -mtune. */
2080 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2082 /* Set by -mtune or -Os. */
2083 const struct processor_costs *ix86_cost = &pentium_cost;
2085 /* Processor feature/optimization bitmasks. */
2086 #define m_386 (1<<PROCESSOR_I386)
2087 #define m_486 (1<<PROCESSOR_I486)
2088 #define m_PENT (1<<PROCESSOR_PENTIUM)
2089 #define m_IAMCU (1<<PROCESSOR_IAMCU)
2090 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2091 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2092 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2093 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2094 #define m_CORE2 (1<<PROCESSOR_CORE2)
2095 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2096 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2097 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2098 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2099 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2100 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2101 #define m_KNL (1<<PROCESSOR_KNL)
2102 #define m_INTEL (1<<PROCESSOR_INTEL)
2104 #define m_GEODE (1<<PROCESSOR_GEODE)
2105 #define m_K6 (1<<PROCESSOR_K6)
2106 #define m_K6_GEODE (m_K6 | m_GEODE)
2107 #define m_K8 (1<<PROCESSOR_K8)
2108 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2109 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2110 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2111 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2112 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2113 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2114 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2115 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2116 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2117 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2118 #define m_BTVER (m_BTVER1 | m_BTVER2)
2119 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2121 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2123 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2124 #undef DEF_TUNE
2125 #define DEF_TUNE(tune, name, selector) name,
2126 #include "x86-tune.def"
2127 #undef DEF_TUNE
2130 /* Feature tests against the various tunings. */
2131 unsigned char ix86_tune_features[X86_TUNE_LAST];
2133 /* Feature tests against the various tunings used to create ix86_tune_features
2134 based on the processor mask. */
2135 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2136 #undef DEF_TUNE
2137 #define DEF_TUNE(tune, name, selector) selector,
2138 #include "x86-tune.def"
2139 #undef DEF_TUNE
2142 /* Feature tests against the various architecture variations. */
2143 unsigned char ix86_arch_features[X86_ARCH_LAST];
2145 /* Feature tests against the various architecture variations, used to create
2146 ix86_arch_features based on the processor mask. */
2147 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2148 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2149 ~(m_386 | m_486 | m_PENT | m_IAMCU | m_K6),
2151 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2152 ~m_386,
2154 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2155 ~(m_386 | m_486),
2157 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2158 ~m_386,
2160 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2161 ~m_386,
2164 /* In case the average insn count for single function invocation is
2165 lower than this constant, emit fast (but longer) prologue and
2166 epilogue code. */
2167 #define FAST_PROLOGUE_INSN_COUNT 20
2169 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2170 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2171 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2172 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2174 /* Array of the smallest class containing reg number REGNO, indexed by
2175 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2177 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2179 /* ax, dx, cx, bx */
2180 AREG, DREG, CREG, BREG,
2181 /* si, di, bp, sp */
2182 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2183 /* FP registers */
2184 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2185 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2186 /* arg pointer */
2187 NON_Q_REGS,
2188 /* flags, fpsr, fpcr, frame */
2189 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2190 /* SSE registers */
2191 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2192 SSE_REGS, SSE_REGS,
2193 /* MMX registers */
2194 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2195 MMX_REGS, MMX_REGS,
2196 /* REX registers */
2197 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2198 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2199 /* SSE REX registers */
2200 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2201 SSE_REGS, SSE_REGS,
2202 /* AVX-512 SSE registers */
2203 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2204 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2205 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2206 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2207 /* Mask registers. */
2208 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2209 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2210 /* MPX bound registers */
2211 BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2214 /* The "default" register map used in 32bit mode. */
2216 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2218 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2219 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2220 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2221 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2222 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2223 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2224 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2225 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2226 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2227 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2228 101, 102, 103, 104, /* bound registers */
2231 /* The "default" register map used in 64bit mode. */
2233 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2235 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2236 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2237 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2238 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2239 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2240 8,9,10,11,12,13,14,15, /* extended integer registers */
2241 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2242 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2243 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2244 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2245 126, 127, 128, 129, /* bound registers */
2248 /* Define the register numbers to be used in Dwarf debugging information.
2249 The SVR4 reference port C compiler uses the following register numbers
2250 in its Dwarf output code:
2251 0 for %eax (gcc regno = 0)
2252 1 for %ecx (gcc regno = 2)
2253 2 for %edx (gcc regno = 1)
2254 3 for %ebx (gcc regno = 3)
2255 4 for %esp (gcc regno = 7)
2256 5 for %ebp (gcc regno = 6)
2257 6 for %esi (gcc regno = 4)
2258 7 for %edi (gcc regno = 5)
2259 The following three DWARF register numbers are never generated by
2260 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2261 believes these numbers have these meanings.
2262 8 for %eip (no gcc equivalent)
2263 9 for %eflags (gcc regno = 17)
2264 10 for %trapno (no gcc equivalent)
2265 It is not at all clear how we should number the FP stack registers
2266 for the x86 architecture. If the version of SDB on x86/svr4 were
2267 a bit less brain dead with respect to floating-point then we would
2268 have a precedent to follow with respect to DWARF register numbers
2269 for x86 FP registers, but the SDB on x86/svr4 is so completely
2270 broken with respect to FP registers that it is hardly worth thinking
2271 of it as something to strive for compatibility with.
2272 The version of x86/svr4 SDB I have at the moment does (partially)
2273 seem to believe that DWARF register number 11 is associated with
2274 the x86 register %st(0), but that's about all. Higher DWARF
2275 register numbers don't seem to be associated with anything in
2276 particular, and even for DWARF regno 11, SDB only seems to under-
2277 stand that it should say that a variable lives in %st(0) (when
2278 asked via an `=' command) if we said it was in DWARF regno 11,
2279 but SDB still prints garbage when asked for the value of the
2280 variable in question (via a `/' command).
2281 (Also note that the labels SDB prints for various FP stack regs
2282 when doing an `x' command are all wrong.)
2283 Note that these problems generally don't affect the native SVR4
2284 C compiler because it doesn't allow the use of -O with -g and
2285 because when it is *not* optimizing, it allocates a memory
2286 location for each floating-point variable, and the memory
2287 location is what gets described in the DWARF AT_location
2288 attribute for the variable in question.
2289 Regardless of the severe mental illness of the x86/svr4 SDB, we
2290 do something sensible here and we use the following DWARF
2291 register numbers. Note that these are all stack-top-relative
2292 numbers.
2293 11 for %st(0) (gcc regno = 8)
2294 12 for %st(1) (gcc regno = 9)
2295 13 for %st(2) (gcc regno = 10)
2296 14 for %st(3) (gcc regno = 11)
2297 15 for %st(4) (gcc regno = 12)
2298 16 for %st(5) (gcc regno = 13)
2299 17 for %st(6) (gcc regno = 14)
2300 18 for %st(7) (gcc regno = 15)
2302 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2304 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2305 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2306 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2307 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2308 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2309 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2310 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2311 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2312 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2313 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2314 101, 102, 103, 104, /* bound registers */
2317 /* Define parameter passing and return registers. */
2319 static int const x86_64_int_parameter_registers[6] =
2321 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2324 static int const x86_64_ms_abi_int_parameter_registers[4] =
2326 CX_REG, DX_REG, R8_REG, R9_REG
2329 static int const x86_64_int_return_registers[4] =
2331 AX_REG, DX_REG, DI_REG, SI_REG
2334 /* Additional registers that are clobbered by SYSV calls. */
2336 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2338 SI_REG, DI_REG,
2339 XMM6_REG, XMM7_REG,
2340 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2341 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2344 /* Define the structure for the machine field in struct function. */
2346 struct GTY(()) stack_local_entry {
2347 unsigned short mode;
2348 unsigned short n;
2349 rtx rtl;
2350 struct stack_local_entry *next;
2353 /* Structure describing stack frame layout.
2354 Stack grows downward:
2356 [arguments]
2357 <- ARG_POINTER
2358 saved pc
2360 saved static chain if ix86_static_chain_on_stack
2362 saved frame pointer if frame_pointer_needed
2363 <- HARD_FRAME_POINTER
2364 [saved regs]
2365 <- regs_save_offset
2366 [padding0]
2368 [saved SSE regs]
2369 <- sse_regs_save_offset
2370 [padding1] |
2371 | <- FRAME_POINTER
2372 [va_arg registers] |
2374 [frame] |
2376 [padding2] | = to_allocate
2377 <- STACK_POINTER
2379 struct ix86_frame
2381 int nsseregs;
2382 int nregs;
2383 int va_arg_size;
2384 int red_zone_size;
2385 int outgoing_arguments_size;
2387 /* The offsets relative to ARG_POINTER. */
2388 HOST_WIDE_INT frame_pointer_offset;
2389 HOST_WIDE_INT hard_frame_pointer_offset;
2390 HOST_WIDE_INT stack_pointer_offset;
2391 HOST_WIDE_INT hfp_save_offset;
2392 HOST_WIDE_INT reg_save_offset;
2393 HOST_WIDE_INT sse_reg_save_offset;
2395 /* When save_regs_using_mov is set, emit prologue using
2396 move instead of push instructions. */
2397 bool save_regs_using_mov;
2400 /* Which cpu are we scheduling for. */
2401 enum attr_cpu ix86_schedule;
2403 /* Which cpu are we optimizing for. */
2404 enum processor_type ix86_tune;
2406 /* Which instruction set architecture to use. */
2407 enum processor_type ix86_arch;
2409 /* True if processor has SSE prefetch instruction. */
2410 unsigned char x86_prefetch_sse;
2412 /* -mstackrealign option */
2413 static const char ix86_force_align_arg_pointer_string[]
2414 = "force_align_arg_pointer";
2416 static rtx (*ix86_gen_leave) (void);
2417 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2418 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2419 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2420 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2421 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2422 static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2423 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2424 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2425 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2426 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2427 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2428 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2430 /* Preferred alignment for stack boundary in bits. */
2431 unsigned int ix86_preferred_stack_boundary;
2433 /* Alignment for incoming stack boundary in bits specified at
2434 command line. */
2435 static unsigned int ix86_user_incoming_stack_boundary;
2437 /* Default alignment for incoming stack boundary in bits. */
2438 static unsigned int ix86_default_incoming_stack_boundary;
2440 /* Alignment for incoming stack boundary in bits. */
2441 unsigned int ix86_incoming_stack_boundary;
2443 /* Calling abi specific va_list type nodes. */
2444 static GTY(()) tree sysv_va_list_type_node;
2445 static GTY(()) tree ms_va_list_type_node;
2447 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2448 char internal_label_prefix[16];
2449 int internal_label_prefix_len;
2451 /* Fence to use after loop using movnt. */
2452 tree x86_mfence;
2454 /* Register class used for passing given 64bit part of the argument.
2455 These represent classes as documented by the PS ABI, with the exception
2456 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2457 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2459 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2460 whenever possible (upper half does contain padding). */
2461 enum x86_64_reg_class
2463 X86_64_NO_CLASS,
2464 X86_64_INTEGER_CLASS,
2465 X86_64_INTEGERSI_CLASS,
2466 X86_64_SSE_CLASS,
2467 X86_64_SSESF_CLASS,
2468 X86_64_SSEDF_CLASS,
2469 X86_64_SSEUP_CLASS,
2470 X86_64_X87_CLASS,
2471 X86_64_X87UP_CLASS,
2472 X86_64_COMPLEX_X87_CLASS,
2473 X86_64_MEMORY_CLASS
2476 #define MAX_CLASSES 8
2478 /* Table of constants used by fldpi, fldln2, etc.... */
2479 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2480 static bool ext_80387_constants_init = 0;
2483 static struct machine_function * ix86_init_machine_status (void);
2484 static rtx ix86_function_value (const_tree, const_tree, bool);
2485 static bool ix86_function_value_regno_p (const unsigned int);
2486 static unsigned int ix86_function_arg_boundary (machine_mode,
2487 const_tree);
2488 static rtx ix86_static_chain (const_tree, bool);
2489 static int ix86_function_regparm (const_tree, const_tree);
2490 static void ix86_compute_frame_layout (struct ix86_frame *);
2491 static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2492 rtx, rtx, int);
2493 static void ix86_add_new_builtins (HOST_WIDE_INT);
2494 static tree ix86_canonical_va_list_type (tree);
2495 static void predict_jump (int);
2496 static unsigned int split_stack_prologue_scratch_regno (void);
2497 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2499 enum ix86_function_specific_strings
2501 IX86_FUNCTION_SPECIFIC_ARCH,
2502 IX86_FUNCTION_SPECIFIC_TUNE,
2503 IX86_FUNCTION_SPECIFIC_MAX
2506 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2507 const char *, enum fpmath_unit, bool);
2508 static void ix86_function_specific_save (struct cl_target_option *,
2509 struct gcc_options *opts);
2510 static void ix86_function_specific_restore (struct gcc_options *opts,
2511 struct cl_target_option *);
2512 static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2513 static void ix86_function_specific_print (FILE *, int,
2514 struct cl_target_option *);
2515 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2516 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2517 struct gcc_options *,
2518 struct gcc_options *,
2519 struct gcc_options *);
2520 static bool ix86_can_inline_p (tree, tree);
2521 static void ix86_set_current_function (tree);
2522 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2524 static enum calling_abi ix86_function_abi (const_tree);
2527 #ifndef SUBTARGET32_DEFAULT_CPU
2528 #define SUBTARGET32_DEFAULT_CPU "i386"
2529 #endif
2531 /* Whether -mtune= or -march= were specified */
2532 static int ix86_tune_defaulted;
2533 static int ix86_arch_specified;
2535 /* Vectorization library interface and handlers. */
2536 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2538 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2539 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2541 /* Processor target table, indexed by processor number */
2542 struct ptt
2544 const char *const name; /* processor name */
2545 const struct processor_costs *cost; /* Processor costs */
2546 const int align_loop; /* Default alignments. */
2547 const int align_loop_max_skip;
2548 const int align_jump;
2549 const int align_jump_max_skip;
2550 const int align_func;
2553 /* This table must be in sync with enum processor_type in i386.h. */
2554 static const struct ptt processor_target_table[PROCESSOR_max] =
2556 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2557 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2558 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2559 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2560 {"iamcu", &iamcu_cost, 16, 7, 16, 7, 16},
2561 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2562 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2563 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2564 {"core2", &core_cost, 16, 10, 16, 10, 16},
2565 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2566 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2567 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2568 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2569 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2570 {"knl", &slm_cost, 16, 15, 16, 7, 16},
2571 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2572 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2573 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2574 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2575 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2576 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2577 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2578 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2579 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2580 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2581 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2582 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2585 static unsigned int
2586 rest_of_handle_insert_vzeroupper (void)
2588 int i;
2590 /* vzeroupper instructions are inserted immediately after reload to
2591 account for possible spills from 256bit registers. The pass
2592 reuses mode switching infrastructure by re-running mode insertion
2593 pass, so disable entities that have already been processed. */
2594 for (i = 0; i < MAX_386_ENTITIES; i++)
2595 ix86_optimize_mode_switching[i] = 0;
2597 ix86_optimize_mode_switching[AVX_U128] = 1;
2599 /* Call optimize_mode_switching. */
2600 g->get_passes ()->execute_pass_mode_switching ();
2601 return 0;
2604 namespace {
2606 const pass_data pass_data_insert_vzeroupper =
2608 RTL_PASS, /* type */
2609 "vzeroupper", /* name */
2610 OPTGROUP_NONE, /* optinfo_flags */
2611 TV_NONE, /* tv_id */
2612 0, /* properties_required */
2613 0, /* properties_provided */
2614 0, /* properties_destroyed */
2615 0, /* todo_flags_start */
2616 TODO_df_finish, /* todo_flags_finish */
2619 class pass_insert_vzeroupper : public rtl_opt_pass
2621 public:
2622 pass_insert_vzeroupper(gcc::context *ctxt)
2623 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2626 /* opt_pass methods: */
2627 virtual bool gate (function *)
2629 return TARGET_AVX && !TARGET_AVX512F
2630 && TARGET_VZEROUPPER && flag_expensive_optimizations
2631 && !optimize_size;
2634 virtual unsigned int execute (function *)
2636 return rest_of_handle_insert_vzeroupper ();
2639 }; // class pass_insert_vzeroupper
2641 } // anon namespace
2643 rtl_opt_pass *
2644 make_pass_insert_vzeroupper (gcc::context *ctxt)
2646 return new pass_insert_vzeroupper (ctxt);
2649 /* Return true if a red-zone is in use. */
2651 static inline bool
2652 ix86_using_red_zone (void)
2654 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2657 /* Return a string that documents the current -m options. The caller is
2658 responsible for freeing the string. */
2660 static char *
2661 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2662 const char *tune, enum fpmath_unit fpmath,
2663 bool add_nl_p)
2665 struct ix86_target_opts
2667 const char *option; /* option string */
2668 HOST_WIDE_INT mask; /* isa mask options */
2671 /* This table is ordered so that options like -msse4.2 that imply
2672 preceding options while match those first. */
2673 static struct ix86_target_opts isa_opts[] =
2675 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2676 { "-mfma", OPTION_MASK_ISA_FMA },
2677 { "-mxop", OPTION_MASK_ISA_XOP },
2678 { "-mlwp", OPTION_MASK_ISA_LWP },
2679 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2680 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2681 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2682 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2683 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2684 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2685 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2686 { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA },
2687 { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI },
2688 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2689 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2690 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2691 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2692 { "-msse3", OPTION_MASK_ISA_SSE3 },
2693 { "-msse2", OPTION_MASK_ISA_SSE2 },
2694 { "-msse", OPTION_MASK_ISA_SSE },
2695 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2696 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2697 { "-mmmx", OPTION_MASK_ISA_MMX },
2698 { "-mabm", OPTION_MASK_ISA_ABM },
2699 { "-mbmi", OPTION_MASK_ISA_BMI },
2700 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2701 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2702 { "-mhle", OPTION_MASK_ISA_HLE },
2703 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2704 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2705 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2706 { "-madx", OPTION_MASK_ISA_ADX },
2707 { "-mtbm", OPTION_MASK_ISA_TBM },
2708 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2709 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2710 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2711 { "-maes", OPTION_MASK_ISA_AES },
2712 { "-msha", OPTION_MASK_ISA_SHA },
2713 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2714 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2715 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2716 { "-mf16c", OPTION_MASK_ISA_F16C },
2717 { "-mrtm", OPTION_MASK_ISA_RTM },
2718 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2719 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2720 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2721 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2722 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2723 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2724 { "-mmpx", OPTION_MASK_ISA_MPX },
2725 { "-mclwb", OPTION_MASK_ISA_CLWB },
2726 { "-mpcommit", OPTION_MASK_ISA_PCOMMIT },
2727 { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
2730 /* Flag options. */
2731 static struct ix86_target_opts flag_opts[] =
2733 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2734 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2735 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2736 { "-m80387", MASK_80387 },
2737 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2738 { "-malign-double", MASK_ALIGN_DOUBLE },
2739 { "-mcld", MASK_CLD },
2740 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2741 { "-mieee-fp", MASK_IEEE_FP },
2742 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2743 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2744 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2745 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2746 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2747 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2748 { "-mno-red-zone", MASK_NO_RED_ZONE },
2749 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2750 { "-mrecip", MASK_RECIP },
2751 { "-mrtd", MASK_RTD },
2752 { "-msseregparm", MASK_SSEREGPARM },
2753 { "-mstack-arg-probe", MASK_STACK_PROBE },
2754 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2755 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2756 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2757 { "-mvzeroupper", MASK_VZEROUPPER },
2758 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2759 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2760 { "-mprefer-avx128", MASK_PREFER_AVX128},
2763 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2765 char isa_other[40];
2766 char target_other[40];
2767 unsigned num = 0;
2768 unsigned i, j;
2769 char *ret;
2770 char *ptr;
2771 size_t len;
2772 size_t line_len;
2773 size_t sep_len;
2774 const char *abi;
2776 memset (opts, '\0', sizeof (opts));
2778 /* Add -march= option. */
2779 if (arch)
2781 opts[num][0] = "-march=";
2782 opts[num++][1] = arch;
2785 /* Add -mtune= option. */
2786 if (tune)
2788 opts[num][0] = "-mtune=";
2789 opts[num++][1] = tune;
2792 /* Add -m32/-m64/-mx32. */
2793 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2795 if ((isa & OPTION_MASK_ABI_64) != 0)
2796 abi = "-m64";
2797 else
2798 abi = "-mx32";
2799 isa &= ~ (OPTION_MASK_ISA_64BIT
2800 | OPTION_MASK_ABI_64
2801 | OPTION_MASK_ABI_X32);
2803 else
2804 abi = "-m32";
2805 opts[num++][0] = abi;
2807 /* Pick out the options in isa options. */
2808 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2810 if ((isa & isa_opts[i].mask) != 0)
2812 opts[num++][0] = isa_opts[i].option;
2813 isa &= ~ isa_opts[i].mask;
2817 if (isa && add_nl_p)
2819 opts[num++][0] = isa_other;
2820 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2821 isa);
2824 /* Add flag options. */
2825 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2827 if ((flags & flag_opts[i].mask) != 0)
2829 opts[num++][0] = flag_opts[i].option;
2830 flags &= ~ flag_opts[i].mask;
2834 if (flags && add_nl_p)
2836 opts[num++][0] = target_other;
2837 sprintf (target_other, "(other flags: %#x)", flags);
2840 /* Add -fpmath= option. */
2841 if (fpmath)
2843 opts[num][0] = "-mfpmath=";
2844 switch ((int) fpmath)
2846 case FPMATH_387:
2847 opts[num++][1] = "387";
2848 break;
2850 case FPMATH_SSE:
2851 opts[num++][1] = "sse";
2852 break;
2854 case FPMATH_387 | FPMATH_SSE:
2855 opts[num++][1] = "sse+387";
2856 break;
2858 default:
2859 gcc_unreachable ();
2863 /* Any options? */
2864 if (num == 0)
2865 return NULL;
2867 gcc_assert (num < ARRAY_SIZE (opts));
2869 /* Size the string. */
2870 len = 0;
2871 sep_len = (add_nl_p) ? 3 : 1;
2872 for (i = 0; i < num; i++)
2874 len += sep_len;
2875 for (j = 0; j < 2; j++)
2876 if (opts[i][j])
2877 len += strlen (opts[i][j]);
2880 /* Build the string. */
2881 ret = ptr = (char *) xmalloc (len);
2882 line_len = 0;
2884 for (i = 0; i < num; i++)
2886 size_t len2[2];
2888 for (j = 0; j < 2; j++)
2889 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2891 if (i != 0)
2893 *ptr++ = ' ';
2894 line_len++;
2896 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2898 *ptr++ = '\\';
2899 *ptr++ = '\n';
2900 line_len = 0;
2904 for (j = 0; j < 2; j++)
2905 if (opts[i][j])
2907 memcpy (ptr, opts[i][j], len2[j]);
2908 ptr += len2[j];
2909 line_len += len2[j];
2913 *ptr = '\0';
2914 gcc_assert (ret + len >= ptr);
2916 return ret;
2919 /* Return true, if profiling code should be emitted before
2920 prologue. Otherwise it returns false.
2921 Note: For x86 with "hotfix" it is sorried. */
2922 static bool
2923 ix86_profile_before_prologue (void)
2925 return flag_fentry != 0;
2928 /* Function that is callable from the debugger to print the current
2929 options. */
2930 void ATTRIBUTE_UNUSED
2931 ix86_debug_options (void)
2933 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2934 ix86_arch_string, ix86_tune_string,
2935 ix86_fpmath, true);
2937 if (opts)
2939 fprintf (stderr, "%s\n\n", opts);
2940 free (opts);
2942 else
2943 fputs ("<no options>\n\n", stderr);
2945 return;
2948 static const char *stringop_alg_names[] = {
2949 #define DEF_ENUM
2950 #define DEF_ALG(alg, name) #name,
2951 #include "stringop.def"
2952 #undef DEF_ENUM
2953 #undef DEF_ALG
2956 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2957 The string is of the following form (or comma separated list of it):
2959 strategy_alg:max_size:[align|noalign]
2961 where the full size range for the strategy is either [0, max_size] or
2962 [min_size, max_size], in which min_size is the max_size + 1 of the
2963 preceding range. The last size range must have max_size == -1.
2965 Examples:
2968 -mmemcpy-strategy=libcall:-1:noalign
2970 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2974 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2976 This is to tell the compiler to use the following strategy for memset
2977 1) when the expected size is between [1, 16], use rep_8byte strategy;
2978 2) when the size is between [17, 2048], use vector_loop;
2979 3) when the size is > 2048, use libcall. */
2981 struct stringop_size_range
2983 int max;
2984 stringop_alg alg;
2985 bool noalign;
2988 static void
2989 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2991 const struct stringop_algs *default_algs;
2992 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2993 char *curr_range_str, *next_range_str;
2994 int i = 0, n = 0;
2996 if (is_memset)
2997 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2998 else
2999 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
3001 curr_range_str = strategy_str;
3005 int maxs;
3006 char alg_name[128];
3007 char align[16];
3008 next_range_str = strchr (curr_range_str, ',');
3009 if (next_range_str)
3010 *next_range_str++ = '\0';
3012 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
3013 alg_name, &maxs, align))
3015 error ("wrong arg %s to option %s", curr_range_str,
3016 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3017 return;
3020 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
3022 error ("size ranges of option %s should be increasing",
3023 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3024 return;
3027 for (i = 0; i < last_alg; i++)
3028 if (!strcmp (alg_name, stringop_alg_names[i]))
3029 break;
3031 if (i == last_alg)
3033 error ("wrong stringop strategy name %s specified for option %s",
3034 alg_name,
3035 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3036 return;
3039 if ((stringop_alg) i == rep_prefix_8_byte
3040 && !TARGET_64BIT)
3042 /* rep; movq isn't available in 32-bit code. */
3043 error ("stringop strategy name %s specified for option %s "
3044 "not supported for 32-bit code",
3045 alg_name,
3046 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3047 return;
3050 input_ranges[n].max = maxs;
3051 input_ranges[n].alg = (stringop_alg) i;
3052 if (!strcmp (align, "align"))
3053 input_ranges[n].noalign = false;
3054 else if (!strcmp (align, "noalign"))
3055 input_ranges[n].noalign = true;
3056 else
3058 error ("unknown alignment %s specified for option %s",
3059 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3060 return;
3062 n++;
3063 curr_range_str = next_range_str;
3065 while (curr_range_str);
3067 if (input_ranges[n - 1].max != -1)
3069 error ("the max value for the last size range should be -1"
3070 " for option %s",
3071 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3072 return;
3075 if (n > MAX_STRINGOP_ALGS)
3077 error ("too many size ranges specified in option %s",
3078 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3079 return;
3082 /* Now override the default algs array. */
3083 for (i = 0; i < n; i++)
3085 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3086 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3087 = input_ranges[i].alg;
3088 *const_cast<int *>(&default_algs->size[i].noalign)
3089 = input_ranges[i].noalign;
3094 /* parse -mtune-ctrl= option. When DUMP is true,
3095 print the features that are explicitly set. */
3097 static void
3098 parse_mtune_ctrl_str (bool dump)
3100 if (!ix86_tune_ctrl_string)
3101 return;
3103 char *next_feature_string = NULL;
3104 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3105 char *orig = curr_feature_string;
3106 int i;
3109 bool clear = false;
3111 next_feature_string = strchr (curr_feature_string, ',');
3112 if (next_feature_string)
3113 *next_feature_string++ = '\0';
3114 if (*curr_feature_string == '^')
3116 curr_feature_string++;
3117 clear = true;
3119 for (i = 0; i < X86_TUNE_LAST; i++)
3121 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3123 ix86_tune_features[i] = !clear;
3124 if (dump)
3125 fprintf (stderr, "Explicitly %s feature %s\n",
3126 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3127 break;
3130 if (i == X86_TUNE_LAST)
3131 error ("Unknown parameter to option -mtune-ctrl: %s",
3132 clear ? curr_feature_string - 1 : curr_feature_string);
3133 curr_feature_string = next_feature_string;
3135 while (curr_feature_string);
3136 free (orig);
3139 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3140 processor type. */
3142 static void
3143 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3145 unsigned int ix86_tune_mask = 1u << ix86_tune;
3146 int i;
3148 for (i = 0; i < X86_TUNE_LAST; ++i)
3150 if (ix86_tune_no_default)
3151 ix86_tune_features[i] = 0;
3152 else
3153 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3156 if (dump)
3158 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3159 for (i = 0; i < X86_TUNE_LAST; i++)
3160 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3161 ix86_tune_features[i] ? "on" : "off");
3164 parse_mtune_ctrl_str (dump);
3168 /* Default align_* from the processor table. */
3170 static void
3171 ix86_default_align (struct gcc_options *opts)
3173 if (opts->x_align_loops == 0)
3175 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3176 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3178 if (opts->x_align_jumps == 0)
3180 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3181 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3183 if (opts->x_align_functions == 0)
3185 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3189 /* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
3191 static void
3192 ix86_override_options_after_change (void)
3194 ix86_default_align (&global_options);
3197 /* Override various settings based on options. If MAIN_ARGS_P, the
3198 options are from the command line, otherwise they are from
3199 attributes. */
3201 static void
3202 ix86_option_override_internal (bool main_args_p,
3203 struct gcc_options *opts,
3204 struct gcc_options *opts_set)
3206 int i;
3207 unsigned int ix86_arch_mask;
3208 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3209 const char *prefix;
3210 const char *suffix;
3211 const char *sw;
3213 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3214 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3215 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3216 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3217 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3218 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3219 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3220 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3221 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3222 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3223 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3224 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3225 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3226 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3227 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3228 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3229 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3230 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3231 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3232 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3233 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3234 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3235 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3236 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3237 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3238 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3239 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3240 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3241 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3242 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3243 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3244 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3245 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3246 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3247 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3248 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3249 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3250 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3251 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3252 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3253 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3254 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3255 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3256 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3257 #define PTA_MPX (HOST_WIDE_INT_1 << 44)
3258 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3259 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3260 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3261 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3262 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3263 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3264 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3265 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3266 #define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53)
3267 #define PTA_AVX512VBMI (HOST_WIDE_INT_1 << 54)
3268 #define PTA_CLWB (HOST_WIDE_INT_1 << 55)
3269 #define PTA_PCOMMIT (HOST_WIDE_INT_1 << 56)
3270 #define PTA_MWAITX (HOST_WIDE_INT_1 << 57)
3272 #define PTA_CORE2 \
3273 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3274 | PTA_CX16 | PTA_FXSR)
3275 #define PTA_NEHALEM \
3276 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3277 #define PTA_WESTMERE \
3278 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3279 #define PTA_SANDYBRIDGE \
3280 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3281 #define PTA_IVYBRIDGE \
3282 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3283 #define PTA_HASWELL \
3284 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3285 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3286 #define PTA_BROADWELL \
3287 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3288 #define PTA_KNL \
3289 (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3290 #define PTA_BONNELL \
3291 (PTA_CORE2 | PTA_MOVBE)
3292 #define PTA_SILVERMONT \
3293 (PTA_WESTMERE | PTA_MOVBE)
3295 /* if this reaches 64, need to widen struct pta flags below */
3297 static struct pta
3299 const char *const name; /* processor name or nickname. */
3300 const enum processor_type processor;
3301 const enum attr_cpu schedule;
3302 const unsigned HOST_WIDE_INT flags;
3304 const processor_alias_table[] =
3306 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3307 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3308 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3309 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3310 {"iamcu", PROCESSOR_IAMCU, CPU_PENTIUM, 0},
3311 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3312 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3313 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3314 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3315 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3316 PTA_MMX | PTA_SSE | PTA_FXSR},
3317 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3318 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3319 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3320 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3321 PTA_MMX | PTA_SSE | PTA_FXSR},
3322 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3323 PTA_MMX | PTA_SSE | PTA_FXSR},
3324 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3325 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3326 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3327 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3328 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3329 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3330 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3331 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3332 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3333 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3334 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3335 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3336 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3337 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3338 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3339 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3340 PTA_SANDYBRIDGE},
3341 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3342 PTA_SANDYBRIDGE},
3343 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3344 PTA_IVYBRIDGE},
3345 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3346 PTA_IVYBRIDGE},
3347 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3348 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3349 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3350 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3351 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3352 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3353 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3354 {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3355 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3356 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3357 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3358 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3359 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3360 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3361 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3362 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3363 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3364 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3365 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3366 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3367 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3368 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3369 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3370 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3371 {"x86-64", PROCESSOR_K8, CPU_K8,
3372 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3373 {"k8", PROCESSOR_K8, CPU_K8,
3374 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3375 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3376 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3377 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3378 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3379 {"opteron", PROCESSOR_K8, CPU_K8,
3380 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3381 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3382 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3383 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3384 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3385 {"athlon64", PROCESSOR_K8, CPU_K8,
3386 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3387 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3388 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3389 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3390 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3391 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3392 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3393 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3394 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3395 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3396 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3397 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3398 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3399 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3400 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3401 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3402 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3403 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3404 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3405 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3406 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3407 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3408 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3409 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3410 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3411 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3412 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3413 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3414 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3415 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3416 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3417 | PTA_XSAVEOPT | PTA_FSGSBASE},
3418 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3419 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3420 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3421 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3422 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3423 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3424 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3425 | PTA_MOVBE | PTA_MWAITX},
3426 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3427 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3428 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3429 | PTA_FXSR | PTA_XSAVE},
3430 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3431 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3432 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3433 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3434 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3435 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3437 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3438 PTA_64BIT
3439 | PTA_HLE /* flags are only used for -march switch. */ },
3442 /* -mrecip options. */
3443 static struct
3445 const char *string; /* option name */
3446 unsigned int mask; /* mask bits to set */
3448 const recip_options[] =
3450 { "all", RECIP_MASK_ALL },
3451 { "none", RECIP_MASK_NONE },
3452 { "div", RECIP_MASK_DIV },
3453 { "sqrt", RECIP_MASK_SQRT },
3454 { "vec-div", RECIP_MASK_VEC_DIV },
3455 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3458 int const pta_size = ARRAY_SIZE (processor_alias_table);
3460 /* Set up prefix/suffix so the error messages refer to either the command
3461 line argument, or the attribute(target). */
3462 if (main_args_p)
3464 prefix = "-m";
3465 suffix = "";
3466 sw = "switch";
3468 else
3470 prefix = "option(\"";
3471 suffix = "\")";
3472 sw = "attribute";
3475 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3476 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3477 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3478 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3479 #ifdef TARGET_BI_ARCH
3480 else
3482 #if TARGET_BI_ARCH == 1
3483 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3484 is on and OPTION_MASK_ABI_X32 is off. We turn off
3485 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3486 -mx32. */
3487 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3488 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3489 #else
3490 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3491 on and OPTION_MASK_ABI_64 is off. We turn off
3492 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3493 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3494 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3495 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3496 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3497 #endif
3498 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3499 && TARGET_IAMCU_P (opts->x_target_flags))
3500 sorry ("Intel MCU psABI isn%'t supported in %s mode",
3501 TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit");
3503 #endif
3505 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3507 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3508 OPTION_MASK_ABI_64 for TARGET_X32. */
3509 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3510 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3512 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3513 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3514 | OPTION_MASK_ABI_X32
3515 | OPTION_MASK_ABI_64);
3516 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3518 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3519 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3520 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3521 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3524 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3525 SUBTARGET_OVERRIDE_OPTIONS;
3526 #endif
3528 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3529 SUBSUBTARGET_OVERRIDE_OPTIONS;
3530 #endif
3532 /* -fPIC is the default for x86_64. */
3533 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3534 opts->x_flag_pic = 2;
3536 /* Need to check -mtune=generic first. */
3537 if (opts->x_ix86_tune_string)
3539 /* As special support for cross compilers we read -mtune=native
3540 as -mtune=generic. With native compilers we won't see the
3541 -mtune=native, as it was changed by the driver. */
3542 if (!strcmp (opts->x_ix86_tune_string, "native"))
3544 opts->x_ix86_tune_string = "generic";
3546 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3547 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3548 "%stune=k8%s or %stune=generic%s instead as appropriate",
3549 prefix, suffix, prefix, suffix, prefix, suffix);
3551 else
3553 if (opts->x_ix86_arch_string)
3554 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3555 if (!opts->x_ix86_tune_string)
3557 opts->x_ix86_tune_string
3558 = processor_target_table[TARGET_CPU_DEFAULT].name;
3559 ix86_tune_defaulted = 1;
3562 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3563 or defaulted. We need to use a sensible tune option. */
3564 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3566 opts->x_ix86_tune_string = "generic";
3570 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3571 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3573 /* rep; movq isn't available in 32-bit code. */
3574 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3575 opts->x_ix86_stringop_alg = no_stringop;
3578 if (!opts->x_ix86_arch_string)
3579 opts->x_ix86_arch_string
3580 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3581 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3582 else
3583 ix86_arch_specified = 1;
3585 if (opts_set->x_ix86_pmode)
3587 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3588 && opts->x_ix86_pmode == PMODE_SI)
3589 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3590 && opts->x_ix86_pmode == PMODE_DI))
3591 error ("address mode %qs not supported in the %s bit mode",
3592 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3593 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3595 else
3596 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3597 ? PMODE_DI : PMODE_SI;
3599 if (!opts_set->x_ix86_abi)
3600 opts->x_ix86_abi = DEFAULT_ABI;
3602 /* For targets using ms ABI enable ms-extensions, if not
3603 explicit turned off. For non-ms ABI we turn off this
3604 option. */
3605 if (!opts_set->x_flag_ms_extensions)
3606 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3608 if (opts_set->x_ix86_cmodel)
3610 switch (opts->x_ix86_cmodel)
3612 case CM_SMALL:
3613 case CM_SMALL_PIC:
3614 if (opts->x_flag_pic)
3615 opts->x_ix86_cmodel = CM_SMALL_PIC;
3616 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3617 error ("code model %qs not supported in the %s bit mode",
3618 "small", "32");
3619 break;
3621 case CM_MEDIUM:
3622 case CM_MEDIUM_PIC:
3623 if (opts->x_flag_pic)
3624 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3625 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3626 error ("code model %qs not supported in the %s bit mode",
3627 "medium", "32");
3628 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3629 error ("code model %qs not supported in x32 mode",
3630 "medium");
3631 break;
3633 case CM_LARGE:
3634 case CM_LARGE_PIC:
3635 if (opts->x_flag_pic)
3636 opts->x_ix86_cmodel = CM_LARGE_PIC;
3637 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3638 error ("code model %qs not supported in the %s bit mode",
3639 "large", "32");
3640 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3641 error ("code model %qs not supported in x32 mode",
3642 "large");
3643 break;
3645 case CM_32:
3646 if (opts->x_flag_pic)
3647 error ("code model %s does not support PIC mode", "32");
3648 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3649 error ("code model %qs not supported in the %s bit mode",
3650 "32", "64");
3651 break;
3653 case CM_KERNEL:
3654 if (opts->x_flag_pic)
3656 error ("code model %s does not support PIC mode", "kernel");
3657 opts->x_ix86_cmodel = CM_32;
3659 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3660 error ("code model %qs not supported in the %s bit mode",
3661 "kernel", "32");
3662 break;
3664 default:
3665 gcc_unreachable ();
3668 else
3670 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3671 use of rip-relative addressing. This eliminates fixups that
3672 would otherwise be needed if this object is to be placed in a
3673 DLL, and is essentially just as efficient as direct addressing. */
3674 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3675 && (TARGET_RDOS || TARGET_PECOFF))
3676 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3677 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3678 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3679 else
3680 opts->x_ix86_cmodel = CM_32;
3682 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3684 error ("-masm=intel not supported in this configuration");
3685 opts->x_ix86_asm_dialect = ASM_ATT;
3687 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3688 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3689 sorry ("%i-bit mode not compiled in",
3690 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3692 for (i = 0; i < pta_size; i++)
3693 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3695 ix86_schedule = processor_alias_table[i].schedule;
3696 ix86_arch = processor_alias_table[i].processor;
3697 /* Default cpu tuning to the architecture. */
3698 ix86_tune = ix86_arch;
3700 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3701 && !(processor_alias_table[i].flags & PTA_64BIT))
3702 error ("CPU you selected does not support x86-64 "
3703 "instruction set");
3705 if (processor_alias_table[i].flags & PTA_MMX
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3708 if (processor_alias_table[i].flags & PTA_3DNOW
3709 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3710 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3711 if (processor_alias_table[i].flags & PTA_3DNOW_A
3712 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3713 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3714 if (processor_alias_table[i].flags & PTA_SSE
3715 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3716 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3717 if (processor_alias_table[i].flags & PTA_SSE2
3718 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3719 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3720 if (processor_alias_table[i].flags & PTA_SSE3
3721 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3722 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3723 if (processor_alias_table[i].flags & PTA_SSSE3
3724 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3725 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3726 if (processor_alias_table[i].flags & PTA_SSE4_1
3727 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3728 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3729 if (processor_alias_table[i].flags & PTA_SSE4_2
3730 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3731 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3732 if (processor_alias_table[i].flags & PTA_AVX
3733 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3734 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3735 if (processor_alias_table[i].flags & PTA_AVX2
3736 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3737 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3738 if (processor_alias_table[i].flags & PTA_FMA
3739 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3740 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3741 if (processor_alias_table[i].flags & PTA_SSE4A
3742 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3743 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3744 if (processor_alias_table[i].flags & PTA_FMA4
3745 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3746 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3747 if (processor_alias_table[i].flags & PTA_XOP
3748 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3749 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3750 if (processor_alias_table[i].flags & PTA_LWP
3751 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3752 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3753 if (processor_alias_table[i].flags & PTA_ABM
3754 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3755 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3756 if (processor_alias_table[i].flags & PTA_BMI
3757 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3758 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3759 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3760 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3761 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3762 if (processor_alias_table[i].flags & PTA_TBM
3763 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3764 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3765 if (processor_alias_table[i].flags & PTA_BMI2
3766 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3767 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3768 if (processor_alias_table[i].flags & PTA_CX16
3769 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3770 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3771 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3772 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3773 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3774 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3775 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3776 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3777 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3778 if (processor_alias_table[i].flags & PTA_MOVBE
3779 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3780 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3781 if (processor_alias_table[i].flags & PTA_AES
3782 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3783 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3784 if (processor_alias_table[i].flags & PTA_SHA
3785 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3786 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3787 if (processor_alias_table[i].flags & PTA_PCLMUL
3788 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3789 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3790 if (processor_alias_table[i].flags & PTA_FSGSBASE
3791 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3792 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3793 if (processor_alias_table[i].flags & PTA_RDRND
3794 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3795 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3796 if (processor_alias_table[i].flags & PTA_F16C
3797 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3798 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3799 if (processor_alias_table[i].flags & PTA_RTM
3800 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3801 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3802 if (processor_alias_table[i].flags & PTA_HLE
3803 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3804 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3805 if (processor_alias_table[i].flags & PTA_PRFCHW
3806 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3807 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3808 if (processor_alias_table[i].flags & PTA_RDSEED
3809 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3810 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3811 if (processor_alias_table[i].flags & PTA_ADX
3812 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3813 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3814 if (processor_alias_table[i].flags & PTA_FXSR
3815 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3816 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3817 if (processor_alias_table[i].flags & PTA_XSAVE
3818 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3819 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3820 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3821 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3822 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3823 if (processor_alias_table[i].flags & PTA_AVX512F
3824 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3825 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3826 if (processor_alias_table[i].flags & PTA_AVX512ER
3827 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3828 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3829 if (processor_alias_table[i].flags & PTA_AVX512PF
3830 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3831 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3832 if (processor_alias_table[i].flags & PTA_AVX512CD
3833 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3834 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3835 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3836 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3837 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3838 if (processor_alias_table[i].flags & PTA_PCOMMIT
3839 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3840 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3841 if (processor_alias_table[i].flags & PTA_CLWB
3842 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3843 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3844 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3845 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3846 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3847 if (processor_alias_table[i].flags & PTA_XSAVEC
3848 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3849 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3850 if (processor_alias_table[i].flags & PTA_XSAVES
3851 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3852 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3853 if (processor_alias_table[i].flags & PTA_AVX512DQ
3854 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3855 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3856 if (processor_alias_table[i].flags & PTA_AVX512BW
3857 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3858 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3859 if (processor_alias_table[i].flags & PTA_AVX512VL
3860 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3861 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3862 if (processor_alias_table[i].flags & PTA_MPX
3863 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3864 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3865 if (processor_alias_table[i].flags & PTA_AVX512VBMI
3866 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3867 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3868 if (processor_alias_table[i].flags & PTA_AVX512IFMA
3869 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3870 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3871 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3872 x86_prefetch_sse = true;
3873 if (processor_alias_table[i].flags & PTA_MWAITX
3874 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3875 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3877 break;
3880 if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3881 error ("Intel MPX does not support x32");
3883 if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3884 error ("Intel MPX does not support x32");
3886 if (TARGET_IAMCU_P (opts->x_target_flags))
3888 /* Verify that x87/MMX/SSE/AVX is off for -miamcu. */
3889 if (TARGET_80387_P (opts->x_target_flags))
3890 sorry ("X87 FPU isn%'t supported in Intel MCU psABI");
3891 else if ((opts->x_ix86_isa_flags & (OPTION_MASK_ISA_MMX
3892 | OPTION_MASK_ISA_SSE
3893 | OPTION_MASK_ISA_AVX)))
3894 sorry ("%s isn%'t supported in Intel MCU psABI",
3895 TARGET_MMX_P (opts->x_ix86_isa_flags)
3896 ? "MMX"
3897 : TARGET_SSE_P (opts->x_ix86_isa_flags) ? "SSE" : "AVX");
3900 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3901 error ("generic CPU can be used only for %stune=%s %s",
3902 prefix, suffix, sw);
3903 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3904 error ("intel CPU can be used only for %stune=%s %s",
3905 prefix, suffix, sw);
3906 else if (i == pta_size)
3907 error ("bad value (%s) for %sarch=%s %s",
3908 opts->x_ix86_arch_string, prefix, suffix, sw);
3910 ix86_arch_mask = 1u << ix86_arch;
3911 for (i = 0; i < X86_ARCH_LAST; ++i)
3912 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3914 for (i = 0; i < pta_size; i++)
3915 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3917 ix86_schedule = processor_alias_table[i].schedule;
3918 ix86_tune = processor_alias_table[i].processor;
3919 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3921 if (!(processor_alias_table[i].flags & PTA_64BIT))
3923 if (ix86_tune_defaulted)
3925 opts->x_ix86_tune_string = "x86-64";
3926 for (i = 0; i < pta_size; i++)
3927 if (! strcmp (opts->x_ix86_tune_string,
3928 processor_alias_table[i].name))
3929 break;
3930 ix86_schedule = processor_alias_table[i].schedule;
3931 ix86_tune = processor_alias_table[i].processor;
3933 else
3934 error ("CPU you selected does not support x86-64 "
3935 "instruction set");
3938 /* Intel CPUs have always interpreted SSE prefetch instructions as
3939 NOPs; so, we can enable SSE prefetch instructions even when
3940 -mtune (rather than -march) points us to a processor that has them.
3941 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3942 higher processors. */
3943 if (TARGET_CMOV
3944 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3945 x86_prefetch_sse = true;
3946 break;
3949 if (ix86_tune_specified && i == pta_size)
3950 error ("bad value (%s) for %stune=%s %s",
3951 opts->x_ix86_tune_string, prefix, suffix, sw);
3953 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3955 #ifndef USE_IX86_FRAME_POINTER
3956 #define USE_IX86_FRAME_POINTER 0
3957 #endif
3959 #ifndef USE_X86_64_FRAME_POINTER
3960 #define USE_X86_64_FRAME_POINTER 0
3961 #endif
3963 /* Set the default values for switches whose default depends on TARGET_64BIT
3964 in case they weren't overwritten by command line options. */
3965 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3967 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3968 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3969 if (opts->x_flag_asynchronous_unwind_tables
3970 && !opts_set->x_flag_unwind_tables
3971 && TARGET_64BIT_MS_ABI)
3972 opts->x_flag_unwind_tables = 1;
3973 if (opts->x_flag_asynchronous_unwind_tables == 2)
3974 opts->x_flag_unwind_tables
3975 = opts->x_flag_asynchronous_unwind_tables = 1;
3976 if (opts->x_flag_pcc_struct_return == 2)
3977 opts->x_flag_pcc_struct_return = 0;
3979 else
3981 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3982 opts->x_flag_omit_frame_pointer
3983 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3984 if (opts->x_flag_asynchronous_unwind_tables == 2)
3985 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3986 if (opts->x_flag_pcc_struct_return == 2)
3988 /* Intel MCU psABI specifies that -freg-struct-return should
3989 be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1,
3990 we check -miamcu so that -freg-struct-return is always
3991 turned on if -miamcu is used. */
3992 if (TARGET_IAMCU_P (opts->x_target_flags))
3993 opts->x_flag_pcc_struct_return = 0;
3994 else
3995 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3999 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4000 /* TODO: ix86_cost should be chosen at instruction or function granuality
4001 so for cold code we use size_cost even in !optimize_size compilation. */
4002 if (opts->x_optimize_size)
4003 ix86_cost = &ix86_size_cost;
4004 else
4005 ix86_cost = ix86_tune_cost;
4007 /* Arrange to set up i386_stack_locals for all functions. */
4008 init_machine_status = ix86_init_machine_status;
4010 /* Validate -mregparm= value. */
4011 if (opts_set->x_ix86_regparm)
4013 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4014 warning (0, "-mregparm is ignored in 64-bit mode");
4015 else if (TARGET_IAMCU_P (opts->x_target_flags))
4016 warning (0, "-mregparm is ignored for Intel MCU psABI");
4017 if (opts->x_ix86_regparm > REGPARM_MAX)
4019 error ("-mregparm=%d is not between 0 and %d",
4020 opts->x_ix86_regparm, REGPARM_MAX);
4021 opts->x_ix86_regparm = 0;
4024 if (TARGET_IAMCU_P (opts->x_target_flags)
4025 || TARGET_64BIT_P (opts->x_ix86_isa_flags))
4026 opts->x_ix86_regparm = REGPARM_MAX;
4028 /* Default align_* from the processor table. */
4029 ix86_default_align (opts);
4031 /* Provide default for -mbranch-cost= value. */
4032 if (!opts_set->x_ix86_branch_cost)
4033 opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
4035 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4037 opts->x_target_flags
4038 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
4040 /* Enable by default the SSE and MMX builtins. Do allow the user to
4041 explicitly disable any of these. In particular, disabling SSE and
4042 MMX for kernel code is extremely useful. */
4043 if (!ix86_arch_specified)
4044 opts->x_ix86_isa_flags
4045 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
4046 | TARGET_SUBTARGET64_ISA_DEFAULT)
4047 & ~opts->x_ix86_isa_flags_explicit);
4049 if (TARGET_RTD_P (opts->x_target_flags))
4050 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
4052 else
4054 opts->x_target_flags
4055 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
4057 if (!ix86_arch_specified)
4058 opts->x_ix86_isa_flags
4059 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
4061 /* i386 ABI does not specify red zone. It still makes sense to use it
4062 when programmer takes care to stack from being destroyed. */
4063 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
4064 opts->x_target_flags |= MASK_NO_RED_ZONE;
4067 /* Keep nonleaf frame pointers. */
4068 if (opts->x_flag_omit_frame_pointer)
4069 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
4070 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
4071 opts->x_flag_omit_frame_pointer = 1;
4073 /* If we're doing fast math, we don't care about comparison order
4074 wrt NaNs. This lets us use a shorter comparison sequence. */
4075 if (opts->x_flag_finite_math_only)
4076 opts->x_target_flags &= ~MASK_IEEE_FP;
4078 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
4079 since the insns won't need emulation. */
4080 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
4081 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
4083 /* Likewise, if the target doesn't have a 387, or we've specified
4084 software floating point, don't use 387 inline intrinsics. */
4085 if (!TARGET_80387_P (opts->x_target_flags))
4086 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
4088 /* Turn on MMX builtins for -msse. */
4089 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4090 opts->x_ix86_isa_flags
4091 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4093 /* Enable SSE prefetch. */
4094 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4095 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4096 x86_prefetch_sse = true;
4098 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
4099 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4100 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4101 opts->x_ix86_isa_flags
4102 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4104 /* Enable popcnt instruction for -msse4.2 or -mabm. */
4105 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4106 || TARGET_ABM_P (opts->x_ix86_isa_flags))
4107 opts->x_ix86_isa_flags
4108 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4110 /* Enable lzcnt instruction for -mabm. */
4111 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4112 opts->x_ix86_isa_flags
4113 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4115 /* Validate -mpreferred-stack-boundary= value or default it to
4116 PREFERRED_STACK_BOUNDARY_DEFAULT. */
4117 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4118 if (opts_set->x_ix86_preferred_stack_boundary_arg)
4120 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4121 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4122 int max = (TARGET_SEH ? 4 : 12);
4124 if (opts->x_ix86_preferred_stack_boundary_arg < min
4125 || opts->x_ix86_preferred_stack_boundary_arg > max)
4127 if (min == max)
4128 error ("-mpreferred-stack-boundary is not supported "
4129 "for this target");
4130 else
4131 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4132 opts->x_ix86_preferred_stack_boundary_arg, min, max);
4134 else
4135 ix86_preferred_stack_boundary
4136 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4139 /* Set the default value for -mstackrealign. */
4140 if (opts->x_ix86_force_align_arg_pointer == -1)
4141 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4143 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4145 /* Validate -mincoming-stack-boundary= value or default it to
4146 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
4147 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4148 if (opts_set->x_ix86_incoming_stack_boundary_arg)
4150 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4151 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4153 if (opts->x_ix86_incoming_stack_boundary_arg < min
4154 || opts->x_ix86_incoming_stack_boundary_arg > 12)
4155 error ("-mincoming-stack-boundary=%d is not between %d and 12",
4156 opts->x_ix86_incoming_stack_boundary_arg, min);
4157 else
4159 ix86_user_incoming_stack_boundary
4160 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4161 ix86_incoming_stack_boundary
4162 = ix86_user_incoming_stack_boundary;
4166 #ifndef NO_PROFILE_COUNTERS
4167 if (flag_nop_mcount)
4168 error ("-mnop-mcount is not compatible with this target");
4169 #endif
4170 if (flag_nop_mcount && flag_pic)
4171 error ("-mnop-mcount is not implemented for -fPIC");
4173 /* Accept -msseregparm only if at least SSE support is enabled. */
4174 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4175 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4176 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4178 if (opts_set->x_ix86_fpmath)
4180 if (opts->x_ix86_fpmath & FPMATH_SSE)
4182 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4184 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4185 opts->x_ix86_fpmath = FPMATH_387;
4187 else if ((opts->x_ix86_fpmath & FPMATH_387)
4188 && !TARGET_80387_P (opts->x_target_flags))
4190 warning (0, "387 instruction set disabled, using SSE arithmetics");
4191 opts->x_ix86_fpmath = FPMATH_SSE;
4195 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4196 fpmath=387. The second is however default at many targets since the
4197 extra 80bit precision of temporaries is considered to be part of ABI.
4198 Overwrite the default at least for -ffast-math.
4199 TODO: -mfpmath=both seems to produce same performing code with bit
4200 smaller binaries. It is however not clear if register allocation is
4201 ready for this setting.
4202 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4203 codegen. We may switch to 387 with -ffast-math for size optimized
4204 functions. */
4205 else if (fast_math_flags_set_p (&global_options)
4206 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4207 opts->x_ix86_fpmath = FPMATH_SSE;
4208 else
4209 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4211 /* If the i387 is disabled, then do not return values in it. */
4212 if (!TARGET_80387_P (opts->x_target_flags))
4213 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4215 /* Use external vectorized library in vectorizing intrinsics. */
4216 if (opts_set->x_ix86_veclibabi_type)
4217 switch (opts->x_ix86_veclibabi_type)
4219 case ix86_veclibabi_type_svml:
4220 ix86_veclib_handler = ix86_veclibabi_svml;
4221 break;
4223 case ix86_veclibabi_type_acml:
4224 ix86_veclib_handler = ix86_veclibabi_acml;
4225 break;
4227 default:
4228 gcc_unreachable ();
4231 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4232 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4233 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4235 /* If stack probes are required, the space used for large function
4236 arguments on the stack must also be probed, so enable
4237 -maccumulate-outgoing-args so this happens in the prologue. */
4238 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4239 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4241 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4242 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4243 "for correctness", prefix, suffix);
4244 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4247 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4249 char *p;
4250 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4251 p = strchr (internal_label_prefix, 'X');
4252 internal_label_prefix_len = p - internal_label_prefix;
4253 *p = '\0';
4256 /* When scheduling description is not available, disable scheduler pass
4257 so it won't slow down the compilation and make x87 code slower. */
4258 if (!TARGET_SCHEDULE)
4259 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4261 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4262 ix86_tune_cost->simultaneous_prefetches,
4263 opts->x_param_values,
4264 opts_set->x_param_values);
4265 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4266 ix86_tune_cost->prefetch_block,
4267 opts->x_param_values,
4268 opts_set->x_param_values);
4269 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4270 ix86_tune_cost->l1_cache_size,
4271 opts->x_param_values,
4272 opts_set->x_param_values);
4273 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4274 ix86_tune_cost->l2_cache_size,
4275 opts->x_param_values,
4276 opts_set->x_param_values);
4278 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4279 if (opts->x_flag_prefetch_loop_arrays < 0
4280 && HAVE_prefetch
4281 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4282 && !opts->x_optimize_size
4283 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4284 opts->x_flag_prefetch_loop_arrays = 1;
4286 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4287 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4288 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4289 targetm.expand_builtin_va_start = NULL;
4291 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4293 ix86_gen_leave = gen_leave_rex64;
4294 if (Pmode == DImode)
4296 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4297 ix86_gen_tls_local_dynamic_base_64
4298 = gen_tls_local_dynamic_base_64_di;
4300 else
4302 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4303 ix86_gen_tls_local_dynamic_base_64
4304 = gen_tls_local_dynamic_base_64_si;
4307 else
4308 ix86_gen_leave = gen_leave;
4310 if (Pmode == DImode)
4312 ix86_gen_add3 = gen_adddi3;
4313 ix86_gen_sub3 = gen_subdi3;
4314 ix86_gen_sub3_carry = gen_subdi3_carry;
4315 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4316 ix86_gen_andsp = gen_anddi3;
4317 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4318 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4319 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4320 ix86_gen_monitor = gen_sse3_monitor_di;
4321 ix86_gen_monitorx = gen_monitorx_di;
4323 else
4325 ix86_gen_add3 = gen_addsi3;
4326 ix86_gen_sub3 = gen_subsi3;
4327 ix86_gen_sub3_carry = gen_subsi3_carry;
4328 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4329 ix86_gen_andsp = gen_andsi3;
4330 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4331 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4332 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4333 ix86_gen_monitor = gen_sse3_monitor_si;
4334 ix86_gen_monitorx = gen_monitorx_si;
4337 #ifdef USE_IX86_CLD
4338 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4339 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4340 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4341 #endif
4343 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4345 if (opts->x_flag_fentry > 0)
4346 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4347 "with -fpic");
4348 opts->x_flag_fentry = 0;
4350 else if (TARGET_SEH)
4352 if (opts->x_flag_fentry == 0)
4353 sorry ("-mno-fentry isn%'t compatible with SEH");
4354 opts->x_flag_fentry = 1;
4356 else if (opts->x_flag_fentry < 0)
4358 #if defined(PROFILE_BEFORE_PROLOGUE)
4359 opts->x_flag_fentry = 1;
4360 #else
4361 opts->x_flag_fentry = 0;
4362 #endif
4365 if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4366 opts->x_target_flags |= MASK_VZEROUPPER;
4367 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4368 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4369 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4370 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4371 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4372 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4373 /* Enable 128-bit AVX instruction generation
4374 for the auto-vectorizer. */
4375 if (TARGET_AVX128_OPTIMAL
4376 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4377 opts->x_target_flags |= MASK_PREFER_AVX128;
4379 if (opts->x_ix86_recip_name)
4381 char *p = ASTRDUP (opts->x_ix86_recip_name);
4382 char *q;
4383 unsigned int mask, i;
4384 bool invert;
4386 while ((q = strtok (p, ",")) != NULL)
4388 p = NULL;
4389 if (*q == '!')
4391 invert = true;
4392 q++;
4394 else
4395 invert = false;
4397 if (!strcmp (q, "default"))
4398 mask = RECIP_MASK_ALL;
4399 else
4401 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4402 if (!strcmp (q, recip_options[i].string))
4404 mask = recip_options[i].mask;
4405 break;
4408 if (i == ARRAY_SIZE (recip_options))
4410 error ("unknown option for -mrecip=%s", q);
4411 invert = false;
4412 mask = RECIP_MASK_NONE;
4416 opts->x_recip_mask_explicit |= mask;
4417 if (invert)
4418 opts->x_recip_mask &= ~mask;
4419 else
4420 opts->x_recip_mask |= mask;
4424 if (TARGET_RECIP_P (opts->x_target_flags))
4425 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4426 else if (opts_set->x_target_flags & MASK_RECIP)
4427 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4429 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4430 for 64-bit Bionic. Also default long double to 64-bit for Intel
4431 MCU psABI. */
4432 if ((TARGET_HAS_BIONIC || TARGET_IAMCU)
4433 && !(opts_set->x_target_flags
4434 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4435 opts->x_target_flags |= (TARGET_64BIT
4436 ? MASK_LONG_DOUBLE_128
4437 : MASK_LONG_DOUBLE_64);
4439 /* Only one of them can be active. */
4440 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4441 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4443 /* Save the initial options in case the user does function specific
4444 options. */
4445 if (main_args_p)
4446 target_option_default_node = target_option_current_node
4447 = build_target_option_node (opts);
4449 /* Handle stack protector */
4450 if (!opts_set->x_ix86_stack_protector_guard)
4451 opts->x_ix86_stack_protector_guard
4452 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4454 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4455 if (opts->x_ix86_tune_memcpy_strategy)
4457 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4458 ix86_parse_stringop_strategy_string (str, false);
4459 free (str);
4462 if (opts->x_ix86_tune_memset_strategy)
4464 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4465 ix86_parse_stringop_strategy_string (str, true);
4466 free (str);
4470 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4472 static void
4473 ix86_option_override (void)
4475 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4476 struct register_pass_info insert_vzeroupper_info
4477 = { pass_insert_vzeroupper, "reload",
4478 1, PASS_POS_INSERT_AFTER
4481 ix86_option_override_internal (true, &global_options, &global_options_set);
4484 /* This needs to be done at start up. It's convenient to do it here. */
4485 register_pass (&insert_vzeroupper_info);
4488 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
4489 static char *
4490 ix86_offload_options (void)
4492 if (TARGET_LP64)
4493 return xstrdup ("-foffload-abi=lp64");
4494 return xstrdup ("-foffload-abi=ilp32");
4497 /* Update register usage after having seen the compiler flags. */
4499 static void
4500 ix86_conditional_register_usage (void)
4502 int i, c_mask;
4504 /* For 32-bit targets, squash the REX registers. */
4505 if (! TARGET_64BIT)
4507 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4508 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4509 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4510 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4511 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4512 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4515 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4516 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4517 : TARGET_64BIT ? (1 << 2)
4518 : (1 << 1));
4520 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4522 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4524 /* Set/reset conditionally defined registers from
4525 CALL_USED_REGISTERS initializer. */
4526 if (call_used_regs[i] > 1)
4527 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4529 /* Calculate registers of CLOBBERED_REGS register set
4530 as call used registers from GENERAL_REGS register set. */
4531 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4532 && call_used_regs[i])
4533 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4536 /* If MMX is disabled, squash the registers. */
4537 if (! TARGET_MMX)
4538 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4539 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4540 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4542 /* If SSE is disabled, squash the registers. */
4543 if (! TARGET_SSE)
4544 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4545 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4546 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4548 /* If the FPU is disabled, squash the registers. */
4549 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4550 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4551 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4552 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4554 /* If AVX512F is disabled, squash the registers. */
4555 if (! TARGET_AVX512F)
4557 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4558 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4560 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4561 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4564 /* If MPX is disabled, squash the registers. */
4565 if (! TARGET_MPX)
4566 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4567 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4571 /* Save the current options */
4573 static void
4574 ix86_function_specific_save (struct cl_target_option *ptr,
4575 struct gcc_options *opts)
4577 ptr->arch = ix86_arch;
4578 ptr->schedule = ix86_schedule;
4579 ptr->prefetch_sse = x86_prefetch_sse;
4580 ptr->tune = ix86_tune;
4581 ptr->branch_cost = ix86_branch_cost;
4582 ptr->tune_defaulted = ix86_tune_defaulted;
4583 ptr->arch_specified = ix86_arch_specified;
4584 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4585 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4586 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4587 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4588 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4589 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4590 ptr->x_ix86_abi = opts->x_ix86_abi;
4591 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4592 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4593 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4594 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4595 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4596 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4597 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4598 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4599 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4600 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4601 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4602 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4603 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4604 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4605 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4606 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4607 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4608 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4609 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4610 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4612 /* The fields are char but the variables are not; make sure the
4613 values fit in the fields. */
4614 gcc_assert (ptr->arch == ix86_arch);
4615 gcc_assert (ptr->schedule == ix86_schedule);
4616 gcc_assert (ptr->tune == ix86_tune);
4617 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4620 /* Restore the current options */
4622 static void
4623 ix86_function_specific_restore (struct gcc_options *opts,
4624 struct cl_target_option *ptr)
4626 enum processor_type old_tune = ix86_tune;
4627 enum processor_type old_arch = ix86_arch;
4628 unsigned int ix86_arch_mask;
4629 int i;
4631 /* We don't change -fPIC. */
4632 opts->x_flag_pic = flag_pic;
4634 ix86_arch = (enum processor_type) ptr->arch;
4635 ix86_schedule = (enum attr_cpu) ptr->schedule;
4636 ix86_tune = (enum processor_type) ptr->tune;
4637 x86_prefetch_sse = ptr->prefetch_sse;
4638 opts->x_ix86_branch_cost = ptr->branch_cost;
4639 ix86_tune_defaulted = ptr->tune_defaulted;
4640 ix86_arch_specified = ptr->arch_specified;
4641 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4642 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4643 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4644 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4645 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4646 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4647 opts->x_ix86_abi = ptr->x_ix86_abi;
4648 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4649 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4650 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4651 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4652 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4653 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4654 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4655 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4656 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4657 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4658 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4659 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4660 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4661 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4662 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4663 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4664 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4665 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4666 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4667 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4668 ix86_tune_cost = processor_target_table[ix86_tune].cost;
4669 /* TODO: ix86_cost should be chosen at instruction or function granuality
4670 so for cold code we use size_cost even in !optimize_size compilation. */
4671 if (opts->x_optimize_size)
4672 ix86_cost = &ix86_size_cost;
4673 else
4674 ix86_cost = ix86_tune_cost;
4676 /* Recreate the arch feature tests if the arch changed */
4677 if (old_arch != ix86_arch)
4679 ix86_arch_mask = 1u << ix86_arch;
4680 for (i = 0; i < X86_ARCH_LAST; ++i)
4681 ix86_arch_features[i]
4682 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4685 /* Recreate the tune optimization tests */
4686 if (old_tune != ix86_tune)
4687 set_ix86_tune_features (ix86_tune, false);
4690 /* Adjust target options after streaming them in. This is mainly about
4691 reconciling them with global options. */
4693 static void
4694 ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4696 /* flag_pic is a global option, but ix86_cmodel is target saved option
4697 partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel
4698 for PIC, or error out. */
4699 if (flag_pic)
4700 switch (ptr->x_ix86_cmodel)
4702 case CM_SMALL:
4703 ptr->x_ix86_cmodel = CM_SMALL_PIC;
4704 break;
4706 case CM_MEDIUM:
4707 ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4708 break;
4710 case CM_LARGE:
4711 ptr->x_ix86_cmodel = CM_LARGE_PIC;
4712 break;
4714 case CM_KERNEL:
4715 error ("code model %s does not support PIC mode", "kernel");
4716 break;
4718 default:
4719 break;
4721 else
4722 switch (ptr->x_ix86_cmodel)
4724 case CM_SMALL_PIC:
4725 ptr->x_ix86_cmodel = CM_SMALL;
4726 break;
4728 case CM_MEDIUM_PIC:
4729 ptr->x_ix86_cmodel = CM_MEDIUM;
4730 break;
4732 case CM_LARGE_PIC:
4733 ptr->x_ix86_cmodel = CM_LARGE;
4734 break;
4736 default:
4737 break;
4741 /* Print the current options */
4743 static void
4744 ix86_function_specific_print (FILE *file, int indent,
4745 struct cl_target_option *ptr)
4747 char *target_string
4748 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4749 NULL, NULL, ptr->x_ix86_fpmath, false);
4751 gcc_assert (ptr->arch < PROCESSOR_max);
4752 fprintf (file, "%*sarch = %d (%s)\n",
4753 indent, "",
4754 ptr->arch, processor_target_table[ptr->arch].name);
4756 gcc_assert (ptr->tune < PROCESSOR_max);
4757 fprintf (file, "%*stune = %d (%s)\n",
4758 indent, "",
4759 ptr->tune, processor_target_table[ptr->tune].name);
4761 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4763 if (target_string)
4765 fprintf (file, "%*s%s\n", indent, "", target_string);
4766 free (target_string);
4771 /* Inner function to process the attribute((target(...))), take an argument and
4772 set the current options from the argument. If we have a list, recursively go
4773 over the list. */
4775 static bool
4776 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4777 struct gcc_options *opts,
4778 struct gcc_options *opts_set,
4779 struct gcc_options *enum_opts_set)
4781 char *next_optstr;
4782 bool ret = true;
4784 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4785 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4786 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4787 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4788 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4790 enum ix86_opt_type
4792 ix86_opt_unknown,
4793 ix86_opt_yes,
4794 ix86_opt_no,
4795 ix86_opt_str,
4796 ix86_opt_enum,
4797 ix86_opt_isa
4800 static const struct
4802 const char *string;
4803 size_t len;
4804 enum ix86_opt_type type;
4805 int opt;
4806 int mask;
4807 } attrs[] = {
4808 /* isa options */
4809 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4810 IX86_ATTR_ISA ("abm", OPT_mabm),
4811 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4812 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4813 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4814 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4815 IX86_ATTR_ISA ("aes", OPT_maes),
4816 IX86_ATTR_ISA ("sha", OPT_msha),
4817 IX86_ATTR_ISA ("avx", OPT_mavx),
4818 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4819 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4820 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4821 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4822 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4823 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4824 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4825 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4826 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4827 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4828 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4829 IX86_ATTR_ISA ("sse", OPT_msse),
4830 IX86_ATTR_ISA ("sse2", OPT_msse2),
4831 IX86_ATTR_ISA ("sse3", OPT_msse3),
4832 IX86_ATTR_ISA ("sse4", OPT_msse4),
4833 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4834 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4835 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4836 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4837 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4838 IX86_ATTR_ISA ("fma", OPT_mfma),
4839 IX86_ATTR_ISA ("xop", OPT_mxop),
4840 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4841 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4842 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4843 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4844 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4845 IX86_ATTR_ISA ("hle", OPT_mhle),
4846 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4847 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4848 IX86_ATTR_ISA ("adx", OPT_madx),
4849 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4850 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4851 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4852 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4853 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4854 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4855 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4856 IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
4857 IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
4858 IX86_ATTR_ISA ("clwb", OPT_mclwb),
4859 IX86_ATTR_ISA ("pcommit", OPT_mpcommit),
4860 IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx),
4862 /* enum options */
4863 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4865 /* string options */
4866 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4867 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4869 /* flag options */
4870 IX86_ATTR_YES ("cld",
4871 OPT_mcld,
4872 MASK_CLD),
4874 IX86_ATTR_NO ("fancy-math-387",
4875 OPT_mfancy_math_387,
4876 MASK_NO_FANCY_MATH_387),
4878 IX86_ATTR_YES ("ieee-fp",
4879 OPT_mieee_fp,
4880 MASK_IEEE_FP),
4882 IX86_ATTR_YES ("inline-all-stringops",
4883 OPT_minline_all_stringops,
4884 MASK_INLINE_ALL_STRINGOPS),
4886 IX86_ATTR_YES ("inline-stringops-dynamically",
4887 OPT_minline_stringops_dynamically,
4888 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4890 IX86_ATTR_NO ("align-stringops",
4891 OPT_mno_align_stringops,
4892 MASK_NO_ALIGN_STRINGOPS),
4894 IX86_ATTR_YES ("recip",
4895 OPT_mrecip,
4896 MASK_RECIP),
4900 /* If this is a list, recurse to get the options. */
4901 if (TREE_CODE (args) == TREE_LIST)
4903 bool ret = true;
4905 for (; args; args = TREE_CHAIN (args))
4906 if (TREE_VALUE (args)
4907 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4908 p_strings, opts, opts_set,
4909 enum_opts_set))
4910 ret = false;
4912 return ret;
4915 else if (TREE_CODE (args) != STRING_CST)
4917 error ("attribute %<target%> argument not a string");
4918 return false;
4921 /* Handle multiple arguments separated by commas. */
4922 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4924 while (next_optstr && *next_optstr != '\0')
4926 char *p = next_optstr;
4927 char *orig_p = p;
4928 char *comma = strchr (next_optstr, ',');
4929 const char *opt_string;
4930 size_t len, opt_len;
4931 int opt;
4932 bool opt_set_p;
4933 char ch;
4934 unsigned i;
4935 enum ix86_opt_type type = ix86_opt_unknown;
4936 int mask = 0;
4938 if (comma)
4940 *comma = '\0';
4941 len = comma - next_optstr;
4942 next_optstr = comma + 1;
4944 else
4946 len = strlen (p);
4947 next_optstr = NULL;
4950 /* Recognize no-xxx. */
4951 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4953 opt_set_p = false;
4954 p += 3;
4955 len -= 3;
4957 else
4958 opt_set_p = true;
4960 /* Find the option. */
4961 ch = *p;
4962 opt = N_OPTS;
4963 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4965 type = attrs[i].type;
4966 opt_len = attrs[i].len;
4967 if (ch == attrs[i].string[0]
4968 && ((type != ix86_opt_str && type != ix86_opt_enum)
4969 ? len == opt_len
4970 : len > opt_len)
4971 && memcmp (p, attrs[i].string, opt_len) == 0)
4973 opt = attrs[i].opt;
4974 mask = attrs[i].mask;
4975 opt_string = attrs[i].string;
4976 break;
4980 /* Process the option. */
4981 if (opt == N_OPTS)
4983 error ("attribute(target(\"%s\")) is unknown", orig_p);
4984 ret = false;
4987 else if (type == ix86_opt_isa)
4989 struct cl_decoded_option decoded;
4991 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4992 ix86_handle_option (opts, opts_set,
4993 &decoded, input_location);
4996 else if (type == ix86_opt_yes || type == ix86_opt_no)
4998 if (type == ix86_opt_no)
4999 opt_set_p = !opt_set_p;
5001 if (opt_set_p)
5002 opts->x_target_flags |= mask;
5003 else
5004 opts->x_target_flags &= ~mask;
5007 else if (type == ix86_opt_str)
5009 if (p_strings[opt])
5011 error ("option(\"%s\") was already specified", opt_string);
5012 ret = false;
5014 else
5015 p_strings[opt] = xstrdup (p + opt_len);
5018 else if (type == ix86_opt_enum)
5020 bool arg_ok;
5021 int value;
5023 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
5024 if (arg_ok)
5025 set_option (opts, enum_opts_set, opt, value,
5026 p + opt_len, DK_UNSPECIFIED, input_location,
5027 global_dc);
5028 else
5030 error ("attribute(target(\"%s\")) is unknown", orig_p);
5031 ret = false;
5035 else
5036 gcc_unreachable ();
5039 return ret;
5042 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
5044 tree
5045 ix86_valid_target_attribute_tree (tree args,
5046 struct gcc_options *opts,
5047 struct gcc_options *opts_set)
5049 const char *orig_arch_string = opts->x_ix86_arch_string;
5050 const char *orig_tune_string = opts->x_ix86_tune_string;
5051 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
5052 int orig_tune_defaulted = ix86_tune_defaulted;
5053 int orig_arch_specified = ix86_arch_specified;
5054 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
5055 tree t = NULL_TREE;
5056 int i;
5057 struct cl_target_option *def
5058 = TREE_TARGET_OPTION (target_option_default_node);
5059 struct gcc_options enum_opts_set;
5061 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
5063 /* Process each of the options on the chain. */
5064 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
5065 opts_set, &enum_opts_set))
5066 return error_mark_node;
5068 /* If the changed options are different from the default, rerun
5069 ix86_option_override_internal, and then save the options away.
5070 The string options are attribute options, and will be undone
5071 when we copy the save structure. */
5072 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
5073 || opts->x_target_flags != def->x_target_flags
5074 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
5075 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
5076 || enum_opts_set.x_ix86_fpmath)
5078 /* If we are using the default tune= or arch=, undo the string assigned,
5079 and use the default. */
5080 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
5081 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
5082 else if (!orig_arch_specified)
5083 opts->x_ix86_arch_string = NULL;
5085 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5086 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
5087 else if (orig_tune_defaulted)
5088 opts->x_ix86_tune_string = NULL;
5090 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
5091 if (enum_opts_set.x_ix86_fpmath)
5092 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5093 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5094 && TARGET_SSE_P (opts->x_ix86_isa_flags))
5096 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5097 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5100 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
5101 ix86_option_override_internal (false, opts, opts_set);
5103 /* Add any builtin functions with the new isa if any. */
5104 ix86_add_new_builtins (opts->x_ix86_isa_flags);
5106 /* Save the current options unless we are validating options for
5107 #pragma. */
5108 t = build_target_option_node (opts);
5110 opts->x_ix86_arch_string = orig_arch_string;
5111 opts->x_ix86_tune_string = orig_tune_string;
5112 opts_set->x_ix86_fpmath = orig_fpmath_set;
5114 /* Free up memory allocated to hold the strings */
5115 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5116 free (option_strings[i]);
5119 return t;
5122 /* Hook to validate attribute((target("string"))). */
5124 static bool
5125 ix86_valid_target_attribute_p (tree fndecl,
5126 tree ARG_UNUSED (name),
5127 tree args,
5128 int ARG_UNUSED (flags))
5130 struct gcc_options func_options;
5131 tree new_target, new_optimize;
5132 bool ret = true;
5134 /* attribute((target("default"))) does nothing, beyond
5135 affecting multi-versioning. */
5136 if (TREE_VALUE (args)
5137 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5138 && TREE_CHAIN (args) == NULL_TREE
5139 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5140 return true;
5142 tree old_optimize = build_optimization_node (&global_options);
5144 /* Get the optimization options of the current function. */
5145 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5147 if (!func_optimize)
5148 func_optimize = old_optimize;
5150 /* Init func_options. */
5151 memset (&func_options, 0, sizeof (func_options));
5152 init_options_struct (&func_options, NULL);
5153 lang_hooks.init_options_struct (&func_options);
5155 cl_optimization_restore (&func_options,
5156 TREE_OPTIMIZATION (func_optimize));
5158 /* Initialize func_options to the default before its target options can
5159 be set. */
5160 cl_target_option_restore (&func_options,
5161 TREE_TARGET_OPTION (target_option_default_node));
5163 new_target = ix86_valid_target_attribute_tree (args, &func_options,
5164 &global_options_set);
5166 new_optimize = build_optimization_node (&func_options);
5168 if (new_target == error_mark_node)
5169 ret = false;
5171 else if (fndecl && new_target)
5173 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5175 if (old_optimize != new_optimize)
5176 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5179 return ret;
5183 /* Hook to determine if one function can safely inline another. */
5185 static bool
5186 ix86_can_inline_p (tree caller, tree callee)
5188 bool ret = false;
5189 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5190 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5192 /* If callee has no option attributes, then it is ok to inline. */
5193 if (!callee_tree)
5194 ret = true;
5196 /* If caller has no option attributes, but callee does then it is not ok to
5197 inline. */
5198 else if (!caller_tree)
5199 ret = false;
5201 else
5203 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5204 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5206 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5207 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5208 function. */
5209 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5210 != callee_opts->x_ix86_isa_flags)
5211 ret = false;
5213 /* See if we have the same non-isa options. */
5214 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5215 ret = false;
5217 /* See if arch, tune, etc. are the same. */
5218 else if (caller_opts->arch != callee_opts->arch)
5219 ret = false;
5221 else if (caller_opts->tune != callee_opts->tune)
5222 ret = false;
5224 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5225 ret = false;
5227 else if (caller_opts->branch_cost != callee_opts->branch_cost)
5228 ret = false;
5230 else
5231 ret = true;
5234 return ret;
5238 /* Remember the last target of ix86_set_current_function. */
5239 static GTY(()) tree ix86_previous_fndecl;
5241 /* Set targets globals to the default (or current #pragma GCC target
5242 if active). Invalidate ix86_previous_fndecl cache. */
5244 void
5245 ix86_reset_previous_fndecl (void)
5247 tree new_tree = target_option_current_node;
5248 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5249 if (TREE_TARGET_GLOBALS (new_tree))
5250 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5251 else if (new_tree == target_option_default_node)
5252 restore_target_globals (&default_target_globals);
5253 else
5254 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5255 ix86_previous_fndecl = NULL_TREE;
5258 /* Establish appropriate back-end context for processing the function
5259 FNDECL. The argument might be NULL to indicate processing at top
5260 level, outside of any function scope. */
5261 static void
5262 ix86_set_current_function (tree fndecl)
5264 /* Only change the context if the function changes. This hook is called
5265 several times in the course of compiling a function, and we don't want to
5266 slow things down too much or call target_reinit when it isn't safe. */
5267 if (fndecl == ix86_previous_fndecl)
5268 return;
5270 tree old_tree;
5271 if (ix86_previous_fndecl == NULL_TREE)
5272 old_tree = target_option_current_node;
5273 else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5274 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5275 else
5276 old_tree = target_option_default_node;
5278 if (fndecl == NULL_TREE)
5280 if (old_tree != target_option_current_node)
5281 ix86_reset_previous_fndecl ();
5282 return;
5285 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5286 if (new_tree == NULL_TREE)
5287 new_tree = target_option_default_node;
5289 if (old_tree != new_tree)
5291 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5292 if (TREE_TARGET_GLOBALS (new_tree))
5293 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5294 else if (new_tree == target_option_default_node)
5295 restore_target_globals (&default_target_globals);
5296 else
5297 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5299 ix86_previous_fndecl = fndecl;
5303 /* Return true if this goes in large data/bss. */
5305 static bool
5306 ix86_in_large_data_p (tree exp)
5308 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5309 return false;
5311 /* Functions are never large data. */
5312 if (TREE_CODE (exp) == FUNCTION_DECL)
5313 return false;
5315 /* Automatic variables are never large data. */
5316 if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5317 return false;
5319 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5321 const char *section = DECL_SECTION_NAME (exp);
5322 if (strcmp (section, ".ldata") == 0
5323 || strcmp (section, ".lbss") == 0)
5324 return true;
5325 return false;
5327 else
5329 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5331 /* If this is an incomplete type with size 0, then we can't put it
5332 in data because it might be too big when completed. Also,
5333 int_size_in_bytes returns -1 if size can vary or is larger than
5334 an integer in which case also it is safer to assume that it goes in
5335 large data. */
5336 if (size <= 0 || size > ix86_section_threshold)
5337 return true;
5340 return false;
5343 /* Switch to the appropriate section for output of DECL.
5344 DECL is either a `VAR_DECL' node or a constant of some sort.
5345 RELOC indicates whether forming the initial value of DECL requires
5346 link-time relocations. */
5348 ATTRIBUTE_UNUSED static section *
5349 x86_64_elf_select_section (tree decl, int reloc,
5350 unsigned HOST_WIDE_INT align)
5352 if (ix86_in_large_data_p (decl))
5354 const char *sname = NULL;
5355 unsigned int flags = SECTION_WRITE;
5356 switch (categorize_decl_for_section (decl, reloc))
5358 case SECCAT_DATA:
5359 sname = ".ldata";
5360 break;
5361 case SECCAT_DATA_REL:
5362 sname = ".ldata.rel";
5363 break;
5364 case SECCAT_DATA_REL_LOCAL:
5365 sname = ".ldata.rel.local";
5366 break;
5367 case SECCAT_DATA_REL_RO:
5368 sname = ".ldata.rel.ro";
5369 break;
5370 case SECCAT_DATA_REL_RO_LOCAL:
5371 sname = ".ldata.rel.ro.local";
5372 break;
5373 case SECCAT_BSS:
5374 sname = ".lbss";
5375 flags |= SECTION_BSS;
5376 break;
5377 case SECCAT_RODATA:
5378 case SECCAT_RODATA_MERGE_STR:
5379 case SECCAT_RODATA_MERGE_STR_INIT:
5380 case SECCAT_RODATA_MERGE_CONST:
5381 sname = ".lrodata";
5382 flags = 0;
5383 break;
5384 case SECCAT_SRODATA:
5385 case SECCAT_SDATA:
5386 case SECCAT_SBSS:
5387 gcc_unreachable ();
5388 case SECCAT_TEXT:
5389 case SECCAT_TDATA:
5390 case SECCAT_TBSS:
5391 /* We don't split these for medium model. Place them into
5392 default sections and hope for best. */
5393 break;
5395 if (sname)
5397 /* We might get called with string constants, but get_named_section
5398 doesn't like them as they are not DECLs. Also, we need to set
5399 flags in that case. */
5400 if (!DECL_P (decl))
5401 return get_section (sname, flags, NULL);
5402 return get_named_section (decl, sname, reloc);
5405 return default_elf_select_section (decl, reloc, align);
5408 /* Select a set of attributes for section NAME based on the properties
5409 of DECL and whether or not RELOC indicates that DECL's initializer
5410 might contain runtime relocations. */
5412 static unsigned int ATTRIBUTE_UNUSED
5413 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5415 unsigned int flags = default_section_type_flags (decl, name, reloc);
5417 if (decl == NULL_TREE
5418 && (strcmp (name, ".ldata.rel.ro") == 0
5419 || strcmp (name, ".ldata.rel.ro.local") == 0))
5420 flags |= SECTION_RELRO;
5422 if (strcmp (name, ".lbss") == 0
5423 || strncmp (name, ".lbss.", 5) == 0
5424 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5425 flags |= SECTION_BSS;
5427 return flags;
5430 /* Build up a unique section name, expressed as a
5431 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5432 RELOC indicates whether the initial value of EXP requires
5433 link-time relocations. */
5435 static void ATTRIBUTE_UNUSED
5436 x86_64_elf_unique_section (tree decl, int reloc)
5438 if (ix86_in_large_data_p (decl))
5440 const char *prefix = NULL;
5441 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5442 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5444 switch (categorize_decl_for_section (decl, reloc))
5446 case SECCAT_DATA:
5447 case SECCAT_DATA_REL:
5448 case SECCAT_DATA_REL_LOCAL:
5449 case SECCAT_DATA_REL_RO:
5450 case SECCAT_DATA_REL_RO_LOCAL:
5451 prefix = one_only ? ".ld" : ".ldata";
5452 break;
5453 case SECCAT_BSS:
5454 prefix = one_only ? ".lb" : ".lbss";
5455 break;
5456 case SECCAT_RODATA:
5457 case SECCAT_RODATA_MERGE_STR:
5458 case SECCAT_RODATA_MERGE_STR_INIT:
5459 case SECCAT_RODATA_MERGE_CONST:
5460 prefix = one_only ? ".lr" : ".lrodata";
5461 break;
5462 case SECCAT_SRODATA:
5463 case SECCAT_SDATA:
5464 case SECCAT_SBSS:
5465 gcc_unreachable ();
5466 case SECCAT_TEXT:
5467 case SECCAT_TDATA:
5468 case SECCAT_TBSS:
5469 /* We don't split these for medium model. Place them into
5470 default sections and hope for best. */
5471 break;
5473 if (prefix)
5475 const char *name, *linkonce;
5476 char *string;
5478 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5479 name = targetm.strip_name_encoding (name);
5481 /* If we're using one_only, then there needs to be a .gnu.linkonce
5482 prefix to the section name. */
5483 linkonce = one_only ? ".gnu.linkonce" : "";
5485 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5487 set_decl_section_name (decl, string);
5488 return;
5491 default_unique_section (decl, reloc);
5494 #ifdef COMMON_ASM_OP
5495 /* This says how to output assembler code to declare an
5496 uninitialized external linkage data object.
5498 For medium model x86-64 we need to use .largecomm opcode for
5499 large objects. */
5500 void
5501 x86_elf_aligned_common (FILE *file,
5502 const char *name, unsigned HOST_WIDE_INT size,
5503 int align)
5505 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5506 && size > (unsigned int)ix86_section_threshold)
5507 fputs ("\t.largecomm\t", file);
5508 else
5509 fputs (COMMON_ASM_OP, file);
5510 assemble_name (file, name);
5511 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5512 size, align / BITS_PER_UNIT);
5514 #endif
5516 /* Utility function for targets to use in implementing
5517 ASM_OUTPUT_ALIGNED_BSS. */
5519 void
5520 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5521 unsigned HOST_WIDE_INT size, int align)
5523 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5524 && size > (unsigned int)ix86_section_threshold)
5525 switch_to_section (get_named_section (decl, ".lbss", 0));
5526 else
5527 switch_to_section (bss_section);
5528 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5529 #ifdef ASM_DECLARE_OBJECT_NAME
5530 last_assemble_variable_decl = decl;
5531 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5532 #else
5533 /* Standard thing is just output label for the object. */
5534 ASM_OUTPUT_LABEL (file, name);
5535 #endif /* ASM_DECLARE_OBJECT_NAME */
5536 ASM_OUTPUT_SKIP (file, size ? size : 1);
5539 /* Decide whether we must probe the stack before any space allocation
5540 on this target. It's essentially TARGET_STACK_PROBE except when
5541 -fstack-check causes the stack to be already probed differently. */
5543 bool
5544 ix86_target_stack_probe (void)
5546 /* Do not probe the stack twice if static stack checking is enabled. */
5547 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5548 return false;
5550 return TARGET_STACK_PROBE;
5553 /* Decide whether we can make a sibling call to a function. DECL is the
5554 declaration of the function being targeted by the call and EXP is the
5555 CALL_EXPR representing the call. */
5557 static bool
5558 ix86_function_ok_for_sibcall (tree decl, tree exp)
5560 tree type, decl_or_type;
5561 rtx a, b;
5563 /* If we are generating position-independent code, we cannot sibcall
5564 optimize direct calls to global functions, as the PLT requires
5565 %ebx be live. (Darwin does not have a PLT.) */
5566 if (!TARGET_MACHO
5567 && !TARGET_64BIT
5568 && flag_pic
5569 && flag_plt
5570 && decl && !targetm.binds_local_p (decl))
5571 return false;
5573 /* If we need to align the outgoing stack, then sibcalling would
5574 unalign the stack, which may break the called function. */
5575 if (ix86_minimum_incoming_stack_boundary (true)
5576 < PREFERRED_STACK_BOUNDARY)
5577 return false;
5579 if (decl)
5581 decl_or_type = decl;
5582 type = TREE_TYPE (decl);
5584 else
5586 /* We're looking at the CALL_EXPR, we need the type of the function. */
5587 type = CALL_EXPR_FN (exp); /* pointer expression */
5588 type = TREE_TYPE (type); /* pointer type */
5589 type = TREE_TYPE (type); /* function type */
5590 decl_or_type = type;
5593 /* Check that the return value locations are the same. Like
5594 if we are returning floats on the 80387 register stack, we cannot
5595 make a sibcall from a function that doesn't return a float to a
5596 function that does or, conversely, from a function that does return
5597 a float to a function that doesn't; the necessary stack adjustment
5598 would not be executed. This is also the place we notice
5599 differences in the return value ABI. Note that it is ok for one
5600 of the functions to have void return type as long as the return
5601 value of the other is passed in a register. */
5602 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5603 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5604 cfun->decl, false);
5605 if (STACK_REG_P (a) || STACK_REG_P (b))
5607 if (!rtx_equal_p (a, b))
5608 return false;
5610 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5612 else if (!rtx_equal_p (a, b))
5613 return false;
5615 if (TARGET_64BIT)
5617 /* The SYSV ABI has more call-clobbered registers;
5618 disallow sibcalls from MS to SYSV. */
5619 if (cfun->machine->call_abi == MS_ABI
5620 && ix86_function_type_abi (type) == SYSV_ABI)
5621 return false;
5623 else
5625 /* If this call is indirect, we'll need to be able to use a
5626 call-clobbered register for the address of the target function.
5627 Make sure that all such registers are not used for passing
5628 parameters. Note that DLLIMPORT functions are indirect. */
5629 if (!decl
5630 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5632 /* Check if regparm >= 3 since arg_reg_available is set to
5633 false if regparm == 0. If regparm is 1 or 2, there is
5634 always a call-clobbered register available.
5636 ??? The symbol indirect call doesn't need a call-clobbered
5637 register. But we don't know if this is a symbol indirect
5638 call or not here. */
5639 if (ix86_function_regparm (type, NULL) >= 3
5640 && !cfun->machine->arg_reg_available)
5641 return false;
5645 /* Otherwise okay. That also includes certain types of indirect calls. */
5646 return true;
5649 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5650 and "sseregparm" calling convention attributes;
5651 arguments as in struct attribute_spec.handler. */
5653 static tree
5654 ix86_handle_cconv_attribute (tree *node, tree name,
5655 tree args,
5656 int,
5657 bool *no_add_attrs)
5659 if (TREE_CODE (*node) != FUNCTION_TYPE
5660 && TREE_CODE (*node) != METHOD_TYPE
5661 && TREE_CODE (*node) != FIELD_DECL
5662 && TREE_CODE (*node) != TYPE_DECL)
5664 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5665 name);
5666 *no_add_attrs = true;
5667 return NULL_TREE;
5670 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5671 if (is_attribute_p ("regparm", name))
5673 tree cst;
5675 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5677 error ("fastcall and regparm attributes are not compatible");
5680 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5682 error ("regparam and thiscall attributes are not compatible");
5685 cst = TREE_VALUE (args);
5686 if (TREE_CODE (cst) != INTEGER_CST)
5688 warning (OPT_Wattributes,
5689 "%qE attribute requires an integer constant argument",
5690 name);
5691 *no_add_attrs = true;
5693 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5695 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5696 name, REGPARM_MAX);
5697 *no_add_attrs = true;
5700 return NULL_TREE;
5703 if (TARGET_64BIT)
5705 /* Do not warn when emulating the MS ABI. */
5706 if ((TREE_CODE (*node) != FUNCTION_TYPE
5707 && TREE_CODE (*node) != METHOD_TYPE)
5708 || ix86_function_type_abi (*node) != MS_ABI)
5709 warning (OPT_Wattributes, "%qE attribute ignored",
5710 name);
5711 *no_add_attrs = true;
5712 return NULL_TREE;
5715 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5716 if (is_attribute_p ("fastcall", name))
5718 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5720 error ("fastcall and cdecl attributes are not compatible");
5722 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5724 error ("fastcall and stdcall attributes are not compatible");
5726 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5728 error ("fastcall and regparm attributes are not compatible");
5730 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5732 error ("fastcall and thiscall attributes are not compatible");
5736 /* Can combine stdcall with fastcall (redundant), regparm and
5737 sseregparm. */
5738 else if (is_attribute_p ("stdcall", name))
5740 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5742 error ("stdcall and cdecl attributes are not compatible");
5744 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5746 error ("stdcall and fastcall attributes are not compatible");
5748 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5750 error ("stdcall and thiscall attributes are not compatible");
5754 /* Can combine cdecl with regparm and sseregparm. */
5755 else if (is_attribute_p ("cdecl", name))
5757 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5759 error ("stdcall and cdecl attributes are not compatible");
5761 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5763 error ("fastcall and cdecl attributes are not compatible");
5765 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5767 error ("cdecl and thiscall attributes are not compatible");
5770 else if (is_attribute_p ("thiscall", name))
5772 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5773 warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5774 name);
5775 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5777 error ("stdcall and thiscall attributes are not compatible");
5779 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5781 error ("fastcall and thiscall attributes are not compatible");
5783 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5785 error ("cdecl and thiscall attributes are not compatible");
5789 /* Can combine sseregparm with all attributes. */
5791 return NULL_TREE;
5794 /* The transactional memory builtins are implicitly regparm or fastcall
5795 depending on the ABI. Override the generic do-nothing attribute that
5796 these builtins were declared with, and replace it with one of the two
5797 attributes that we expect elsewhere. */
5799 static tree
5800 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5801 int flags, bool *no_add_attrs)
5803 tree alt;
5805 /* In no case do we want to add the placeholder attribute. */
5806 *no_add_attrs = true;
5808 /* The 64-bit ABI is unchanged for transactional memory. */
5809 if (TARGET_64BIT)
5810 return NULL_TREE;
5812 /* ??? Is there a better way to validate 32-bit windows? We have
5813 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5814 if (CHECK_STACK_LIMIT > 0)
5815 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5816 else
5818 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5819 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5821 decl_attributes (node, alt, flags);
5823 return NULL_TREE;
5826 /* This function determines from TYPE the calling-convention. */
5828 unsigned int
5829 ix86_get_callcvt (const_tree type)
5831 unsigned int ret = 0;
5832 bool is_stdarg;
5833 tree attrs;
5835 if (TARGET_64BIT)
5836 return IX86_CALLCVT_CDECL;
5838 attrs = TYPE_ATTRIBUTES (type);
5839 if (attrs != NULL_TREE)
5841 if (lookup_attribute ("cdecl", attrs))
5842 ret |= IX86_CALLCVT_CDECL;
5843 else if (lookup_attribute ("stdcall", attrs))
5844 ret |= IX86_CALLCVT_STDCALL;
5845 else if (lookup_attribute ("fastcall", attrs))
5846 ret |= IX86_CALLCVT_FASTCALL;
5847 else if (lookup_attribute ("thiscall", attrs))
5848 ret |= IX86_CALLCVT_THISCALL;
5850 /* Regparam isn't allowed for thiscall and fastcall. */
5851 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5853 if (lookup_attribute ("regparm", attrs))
5854 ret |= IX86_CALLCVT_REGPARM;
5855 if (lookup_attribute ("sseregparm", attrs))
5856 ret |= IX86_CALLCVT_SSEREGPARM;
5859 if (IX86_BASE_CALLCVT(ret) != 0)
5860 return ret;
5863 is_stdarg = stdarg_p (type);
5864 if (TARGET_RTD && !is_stdarg)
5865 return IX86_CALLCVT_STDCALL | ret;
5867 if (ret != 0
5868 || is_stdarg
5869 || TREE_CODE (type) != METHOD_TYPE
5870 || ix86_function_type_abi (type) != MS_ABI)
5871 return IX86_CALLCVT_CDECL | ret;
5873 return IX86_CALLCVT_THISCALL;
5876 /* Return 0 if the attributes for two types are incompatible, 1 if they
5877 are compatible, and 2 if they are nearly compatible (which causes a
5878 warning to be generated). */
5880 static int
5881 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5883 unsigned int ccvt1, ccvt2;
5885 if (TREE_CODE (type1) != FUNCTION_TYPE
5886 && TREE_CODE (type1) != METHOD_TYPE)
5887 return 1;
5889 ccvt1 = ix86_get_callcvt (type1);
5890 ccvt2 = ix86_get_callcvt (type2);
5891 if (ccvt1 != ccvt2)
5892 return 0;
5893 if (ix86_function_regparm (type1, NULL)
5894 != ix86_function_regparm (type2, NULL))
5895 return 0;
5897 return 1;
5900 /* Return the regparm value for a function with the indicated TYPE and DECL.
5901 DECL may be NULL when calling function indirectly
5902 or considering a libcall. */
5904 static int
5905 ix86_function_regparm (const_tree type, const_tree decl)
5907 tree attr;
5908 int regparm;
5909 unsigned int ccvt;
5911 if (TARGET_64BIT)
5912 return (ix86_function_type_abi (type) == SYSV_ABI
5913 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5914 ccvt = ix86_get_callcvt (type);
5915 regparm = ix86_regparm;
5917 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5919 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5920 if (attr)
5922 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5923 return regparm;
5926 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5927 return 2;
5928 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5929 return 1;
5931 /* Use register calling convention for local functions when possible. */
5932 if (decl
5933 && TREE_CODE (decl) == FUNCTION_DECL)
5935 cgraph_node *target = cgraph_node::get (decl);
5936 if (target)
5937 target = target->function_symbol ();
5939 /* Caller and callee must agree on the calling convention, so
5940 checking here just optimize means that with
5941 __attribute__((optimize (...))) caller could use regparm convention
5942 and callee not, or vice versa. Instead look at whether the callee
5943 is optimized or not. */
5944 if (target && opt_for_fn (target->decl, optimize)
5945 && !(profile_flag && !flag_fentry))
5947 cgraph_local_info *i = &target->local;
5948 if (i && i->local && i->can_change_signature)
5950 int local_regparm, globals = 0, regno;
5952 /* Make sure no regparm register is taken by a
5953 fixed register variable. */
5954 for (local_regparm = 0; local_regparm < REGPARM_MAX;
5955 local_regparm++)
5956 if (fixed_regs[local_regparm])
5957 break;
5959 /* We don't want to use regparm(3) for nested functions as
5960 these use a static chain pointer in the third argument. */
5961 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5962 local_regparm = 2;
5964 /* Save a register for the split stack. */
5965 if (local_regparm == 3 && flag_split_stack)
5966 local_regparm = 2;
5968 /* Each fixed register usage increases register pressure,
5969 so less registers should be used for argument passing.
5970 This functionality can be overriden by an explicit
5971 regparm value. */
5972 for (regno = AX_REG; regno <= DI_REG; regno++)
5973 if (fixed_regs[regno])
5974 globals++;
5976 local_regparm
5977 = globals < local_regparm ? local_regparm - globals : 0;
5979 if (local_regparm > regparm)
5980 regparm = local_regparm;
5985 return regparm;
5988 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5989 DFmode (2) arguments in SSE registers for a function with the
5990 indicated TYPE and DECL. DECL may be NULL when calling function
5991 indirectly or considering a libcall. Return -1 if any FP parameter
5992 should be rejected by error. This is used in siutation we imply SSE
5993 calling convetion but the function is called from another function with
5994 SSE disabled. Otherwise return 0. */
5996 static int
5997 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5999 gcc_assert (!TARGET_64BIT);
6001 /* Use SSE registers to pass SFmode and DFmode arguments if requested
6002 by the sseregparm attribute. */
6003 if (TARGET_SSEREGPARM
6004 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
6006 if (!TARGET_SSE)
6008 if (warn)
6010 if (decl)
6011 error ("calling %qD with attribute sseregparm without "
6012 "SSE/SSE2 enabled", decl);
6013 else
6014 error ("calling %qT with attribute sseregparm without "
6015 "SSE/SSE2 enabled", type);
6017 return 0;
6020 return 2;
6023 if (!decl)
6024 return 0;
6026 cgraph_node *target = cgraph_node::get (decl);
6027 if (target)
6028 target = target->function_symbol ();
6030 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
6031 (and DFmode for SSE2) arguments in SSE registers. */
6032 if (target
6033 /* TARGET_SSE_MATH */
6034 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
6035 && opt_for_fn (target->decl, optimize)
6036 && !(profile_flag && !flag_fentry))
6038 cgraph_local_info *i = &target->local;
6039 if (i && i->local && i->can_change_signature)
6041 /* Refuse to produce wrong code when local function with SSE enabled
6042 is called from SSE disabled function.
6043 FIXME: We need a way to detect these cases cross-ltrans partition
6044 and avoid using SSE calling conventions on local functions called
6045 from function with SSE disabled. For now at least delay the
6046 warning until we know we are going to produce wrong code.
6047 See PR66047 */
6048 if (!TARGET_SSE && warn)
6049 return -1;
6050 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
6051 ->x_ix86_isa_flags) ? 2 : 1;
6055 return 0;
6058 /* Return true if EAX is live at the start of the function. Used by
6059 ix86_expand_prologue to determine if we need special help before
6060 calling allocate_stack_worker. */
6062 static bool
6063 ix86_eax_live_at_start_p (void)
6065 /* Cheat. Don't bother working forward from ix86_function_regparm
6066 to the function type to whether an actual argument is located in
6067 eax. Instead just look at cfg info, which is still close enough
6068 to correct at this point. This gives false positives for broken
6069 functions that might use uninitialized data that happens to be
6070 allocated in eax, but who cares? */
6071 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
6074 static bool
6075 ix86_keep_aggregate_return_pointer (tree fntype)
6077 tree attr;
6079 if (!TARGET_64BIT)
6081 attr = lookup_attribute ("callee_pop_aggregate_return",
6082 TYPE_ATTRIBUTES (fntype));
6083 if (attr)
6084 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6086 /* For 32-bit MS-ABI the default is to keep aggregate
6087 return pointer. */
6088 if (ix86_function_type_abi (fntype) == MS_ABI)
6089 return true;
6091 return KEEP_AGGREGATE_RETURN_POINTER != 0;
6094 /* Value is the number of bytes of arguments automatically
6095 popped when returning from a subroutine call.
6096 FUNDECL is the declaration node of the function (as a tree),
6097 FUNTYPE is the data type of the function (as a tree),
6098 or for a library call it is an identifier node for the subroutine name.
6099 SIZE is the number of bytes of arguments passed on the stack.
6101 On the 80386, the RTD insn may be used to pop them if the number
6102 of args is fixed, but if the number is variable then the caller
6103 must pop them all. RTD can't be used for library calls now
6104 because the library is compiled with the Unix compiler.
6105 Use of RTD is a selectable option, since it is incompatible with
6106 standard Unix calling sequences. If the option is not selected,
6107 the caller must always pop the args.
6109 The attribute stdcall is equivalent to RTD on a per module basis. */
6111 static int
6112 ix86_return_pops_args (tree fundecl, tree funtype, int size)
6114 unsigned int ccvt;
6116 /* None of the 64-bit ABIs pop arguments. */
6117 if (TARGET_64BIT)
6118 return 0;
6120 ccvt = ix86_get_callcvt (funtype);
6122 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6123 | IX86_CALLCVT_THISCALL)) != 0
6124 && ! stdarg_p (funtype))
6125 return size;
6127 /* Lose any fake structure return argument if it is passed on the stack. */
6128 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6129 && !ix86_keep_aggregate_return_pointer (funtype))
6131 int nregs = ix86_function_regparm (funtype, fundecl);
6132 if (nregs == 0)
6133 return GET_MODE_SIZE (Pmode);
6136 return 0;
6139 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
6141 static bool
6142 ix86_legitimate_combined_insn (rtx_insn *insn)
6144 /* Check operand constraints in case hard registers were propagated
6145 into insn pattern. This check prevents combine pass from
6146 generating insn patterns with invalid hard register operands.
6147 These invalid insns can eventually confuse reload to error out
6148 with a spill failure. See also PRs 46829 and 46843. */
6149 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6151 int i;
6153 extract_insn (insn);
6154 preprocess_constraints (insn);
6156 int n_operands = recog_data.n_operands;
6157 int n_alternatives = recog_data.n_alternatives;
6158 for (i = 0; i < n_operands; i++)
6160 rtx op = recog_data.operand[i];
6161 machine_mode mode = GET_MODE (op);
6162 const operand_alternative *op_alt;
6163 int offset = 0;
6164 bool win;
6165 int j;
6167 /* For pre-AVX disallow unaligned loads/stores where the
6168 instructions don't support it. */
6169 if (!TARGET_AVX
6170 && VECTOR_MODE_P (GET_MODE (op))
6171 && misaligned_operand (op, GET_MODE (op)))
6173 int min_align = get_attr_ssememalign (insn);
6174 if (min_align == 0)
6175 return false;
6178 /* A unary operator may be accepted by the predicate, but it
6179 is irrelevant for matching constraints. */
6180 if (UNARY_P (op))
6181 op = XEXP (op, 0);
6183 if (GET_CODE (op) == SUBREG)
6185 if (REG_P (SUBREG_REG (op))
6186 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6187 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6188 GET_MODE (SUBREG_REG (op)),
6189 SUBREG_BYTE (op),
6190 GET_MODE (op));
6191 op = SUBREG_REG (op);
6194 if (!(REG_P (op) && HARD_REGISTER_P (op)))
6195 continue;
6197 op_alt = recog_op_alt;
6199 /* Operand has no constraints, anything is OK. */
6200 win = !n_alternatives;
6202 alternative_mask preferred = get_preferred_alternatives (insn);
6203 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6205 if (!TEST_BIT (preferred, j))
6206 continue;
6207 if (op_alt[i].anything_ok
6208 || (op_alt[i].matches != -1
6209 && operands_match_p
6210 (recog_data.operand[i],
6211 recog_data.operand[op_alt[i].matches]))
6212 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6214 win = true;
6215 break;
6219 if (!win)
6220 return false;
6224 return true;
6227 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
6229 static unsigned HOST_WIDE_INT
6230 ix86_asan_shadow_offset (void)
6232 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6233 : HOST_WIDE_INT_C (0x7fff8000))
6234 : (HOST_WIDE_INT_1 << 29);
6237 /* Argument support functions. */
6239 /* Return true when register may be used to pass function parameters. */
6240 bool
6241 ix86_function_arg_regno_p (int regno)
6243 int i;
6244 enum calling_abi call_abi;
6245 const int *parm_regs;
6247 if (TARGET_MPX && BND_REGNO_P (regno))
6248 return true;
6250 if (!TARGET_64BIT)
6252 if (TARGET_MACHO)
6253 return (regno < REGPARM_MAX
6254 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6255 else
6256 return (regno < REGPARM_MAX
6257 || (TARGET_MMX && MMX_REGNO_P (regno)
6258 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6259 || (TARGET_SSE && SSE_REGNO_P (regno)
6260 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6263 if (TARGET_SSE && SSE_REGNO_P (regno)
6264 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6265 return true;
6267 /* TODO: The function should depend on current function ABI but
6268 builtins.c would need updating then. Therefore we use the
6269 default ABI. */
6270 call_abi = ix86_cfun_abi ();
6272 /* RAX is used as hidden argument to va_arg functions. */
6273 if (call_abi == SYSV_ABI && regno == AX_REG)
6274 return true;
6276 if (call_abi == MS_ABI)
6277 parm_regs = x86_64_ms_abi_int_parameter_registers;
6278 else
6279 parm_regs = x86_64_int_parameter_registers;
6281 for (i = 0; i < (call_abi == MS_ABI
6282 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6283 if (regno == parm_regs[i])
6284 return true;
6285 return false;
6288 /* Return if we do not know how to pass TYPE solely in registers. */
6290 static bool
6291 ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6293 if (must_pass_in_stack_var_size_or_pad (mode, type))
6294 return true;
6296 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6297 The layout_type routine is crafty and tries to trick us into passing
6298 currently unsupported vector types on the stack by using TImode. */
6299 return (!TARGET_64BIT && mode == TImode
6300 && type && TREE_CODE (type) != VECTOR_TYPE);
6303 /* It returns the size, in bytes, of the area reserved for arguments passed
6304 in registers for the function represented by fndecl dependent to the used
6305 abi format. */
6307 ix86_reg_parm_stack_space (const_tree fndecl)
6309 enum calling_abi call_abi = SYSV_ABI;
6310 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6311 call_abi = ix86_function_abi (fndecl);
6312 else
6313 call_abi = ix86_function_type_abi (fndecl);
6314 if (TARGET_64BIT && call_abi == MS_ABI)
6315 return 32;
6316 return 0;
6319 /* We add this as a workaround in order to use libc_has_function
6320 hook in i386.md. */
6321 bool
6322 ix86_libc_has_function (enum function_class fn_class)
6324 return targetm.libc_has_function (fn_class);
6327 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
6328 specifying the call abi used. */
6329 enum calling_abi
6330 ix86_function_type_abi (const_tree fntype)
6332 enum calling_abi abi = ix86_abi;
6334 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
6335 return abi;
6337 if (abi == SYSV_ABI
6338 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6340 if (TARGET_X32)
6341 error ("X32 does not support ms_abi attribute");
6343 abi = MS_ABI;
6345 else if (abi == MS_ABI
6346 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6347 abi = SYSV_ABI;
6349 return abi;
6352 static enum calling_abi
6353 ix86_function_abi (const_tree fndecl)
6355 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
6358 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
6359 specifying the call abi used. */
6360 enum calling_abi
6361 ix86_cfun_abi (void)
6363 return cfun ? cfun->machine->call_abi : ix86_abi;
6366 static bool
6367 ix86_function_ms_hook_prologue (const_tree fn)
6369 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6371 if (decl_function_context (fn) != NULL_TREE)
6372 error_at (DECL_SOURCE_LOCATION (fn),
6373 "ms_hook_prologue is not compatible with nested function");
6374 else
6375 return true;
6377 return false;
6380 /* Write the extra assembler code needed to declare a function properly. */
6382 void
6383 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6384 tree decl)
6386 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6388 if (is_ms_hook)
6390 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6391 unsigned int filler_cc = 0xcccccccc;
6393 for (i = 0; i < filler_count; i += 4)
6394 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6397 #ifdef SUBTARGET_ASM_UNWIND_INIT
6398 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6399 #endif
6401 ASM_OUTPUT_LABEL (asm_out_file, fname);
6403 /* Output magic byte marker, if hot-patch attribute is set. */
6404 if (is_ms_hook)
6406 if (TARGET_64BIT)
6408 /* leaq [%rsp + 0], %rsp */
6409 asm_fprintf (asm_out_file, ASM_BYTE
6410 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6412 else
6414 /* movl.s %edi, %edi
6415 push %ebp
6416 movl.s %esp, %ebp */
6417 asm_fprintf (asm_out_file, ASM_BYTE
6418 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6423 /* regclass.c */
6424 extern void init_regs (void);
6426 /* Implementation of call abi switching target hook. Specific to FNDECL
6427 the specific call register sets are set. See also
6428 ix86_conditional_register_usage for more details. */
6429 void
6430 ix86_call_abi_override (const_tree fndecl)
6432 cfun->machine->call_abi = ix86_function_abi (fndecl);
6435 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6436 expensive re-initialization of init_regs each time we switch function context
6437 since this is needed only during RTL expansion. */
6438 static void
6439 ix86_maybe_switch_abi (void)
6441 if (TARGET_64BIT &&
6442 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6443 reinit_regs ();
6446 /* Return 1 if pseudo register should be created and used to hold
6447 GOT address for PIC code. */
6448 bool
6449 ix86_use_pseudo_pic_reg (void)
6451 if ((TARGET_64BIT
6452 && (ix86_cmodel == CM_SMALL_PIC
6453 || TARGET_PECOFF))
6454 || !flag_pic)
6455 return false;
6456 return true;
6459 /* Initialize large model PIC register. */
6461 static void
6462 ix86_init_large_pic_reg (unsigned int tmp_regno)
6464 rtx_code_label *label;
6465 rtx tmp_reg;
6467 gcc_assert (Pmode == DImode);
6468 label = gen_label_rtx ();
6469 emit_label (label);
6470 LABEL_PRESERVE_P (label) = 1;
6471 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6472 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6473 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6474 label));
6475 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6476 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6477 pic_offset_table_rtx, tmp_reg));
6480 /* Create and initialize PIC register if required. */
6481 static void
6482 ix86_init_pic_reg (void)
6484 edge entry_edge;
6485 rtx_insn *seq;
6487 if (!ix86_use_pseudo_pic_reg ())
6488 return;
6490 start_sequence ();
6492 if (TARGET_64BIT)
6494 if (ix86_cmodel == CM_LARGE_PIC)
6495 ix86_init_large_pic_reg (R11_REG);
6496 else
6497 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6499 else
6501 /* If there is future mcount call in the function it is more profitable
6502 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
6503 rtx reg = crtl->profile
6504 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6505 : pic_offset_table_rtx;
6506 rtx_insn *insn = emit_insn (gen_set_got (reg));
6507 RTX_FRAME_RELATED_P (insn) = 1;
6508 if (crtl->profile)
6509 emit_move_insn (pic_offset_table_rtx, reg);
6510 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6513 seq = get_insns ();
6514 end_sequence ();
6516 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6517 insert_insn_on_edge (seq, entry_edge);
6518 commit_one_edge_insertion (entry_edge);
6521 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6522 for a call to a function whose data type is FNTYPE.
6523 For a library call, FNTYPE is 0. */
6525 void
6526 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6527 tree fntype, /* tree ptr for function decl */
6528 rtx libname, /* SYMBOL_REF of library name or 0 */
6529 tree fndecl,
6530 int caller)
6532 struct cgraph_local_info *i = NULL;
6533 struct cgraph_node *target = NULL;
6535 memset (cum, 0, sizeof (*cum));
6537 if (fndecl)
6539 target = cgraph_node::get (fndecl);
6540 if (target)
6542 target = target->function_symbol ();
6543 i = cgraph_node::local_info (target->decl);
6544 cum->call_abi = ix86_function_abi (target->decl);
6546 else
6547 cum->call_abi = ix86_function_abi (fndecl);
6549 else
6550 cum->call_abi = ix86_function_type_abi (fntype);
6552 cum->caller = caller;
6554 /* Set up the number of registers to use for passing arguments. */
6555 cum->nregs = ix86_regparm;
6556 if (TARGET_64BIT)
6558 cum->nregs = (cum->call_abi == SYSV_ABI
6559 ? X86_64_REGPARM_MAX
6560 : X86_64_MS_REGPARM_MAX);
6562 if (TARGET_SSE)
6564 cum->sse_nregs = SSE_REGPARM_MAX;
6565 if (TARGET_64BIT)
6567 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6568 ? X86_64_SSE_REGPARM_MAX
6569 : X86_64_MS_SSE_REGPARM_MAX);
6572 if (TARGET_MMX)
6573 cum->mmx_nregs = MMX_REGPARM_MAX;
6574 cum->warn_avx512f = true;
6575 cum->warn_avx = true;
6576 cum->warn_sse = true;
6577 cum->warn_mmx = true;
6579 /* Because type might mismatch in between caller and callee, we need to
6580 use actual type of function for local calls.
6581 FIXME: cgraph_analyze can be told to actually record if function uses
6582 va_start so for local functions maybe_vaarg can be made aggressive
6583 helping K&R code.
6584 FIXME: once typesytem is fixed, we won't need this code anymore. */
6585 if (i && i->local && i->can_change_signature)
6586 fntype = TREE_TYPE (target->decl);
6587 cum->stdarg = stdarg_p (fntype);
6588 cum->maybe_vaarg = (fntype
6589 ? (!prototype_p (fntype) || stdarg_p (fntype))
6590 : !libname);
6592 cum->bnd_regno = FIRST_BND_REG;
6593 cum->bnds_in_bt = 0;
6594 cum->force_bnd_pass = 0;
6595 cum->decl = fndecl;
6597 if (!TARGET_64BIT)
6599 /* If there are variable arguments, then we won't pass anything
6600 in registers in 32-bit mode. */
6601 if (stdarg_p (fntype))
6603 cum->nregs = 0;
6604 /* Since in 32-bit, variable arguments are always passed on
6605 stack, there is scratch register available for indirect
6606 sibcall. */
6607 cfun->machine->arg_reg_available = true;
6608 cum->sse_nregs = 0;
6609 cum->mmx_nregs = 0;
6610 cum->warn_avx512f = false;
6611 cum->warn_avx = false;
6612 cum->warn_sse = false;
6613 cum->warn_mmx = false;
6614 return;
6617 /* Use ecx and edx registers if function has fastcall attribute,
6618 else look for regparm information. */
6619 if (fntype)
6621 unsigned int ccvt = ix86_get_callcvt (fntype);
6622 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6624 cum->nregs = 1;
6625 cum->fastcall = 1; /* Same first register as in fastcall. */
6627 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6629 cum->nregs = 2;
6630 cum->fastcall = 1;
6632 else
6633 cum->nregs = ix86_function_regparm (fntype, fndecl);
6636 /* Set up the number of SSE registers used for passing SFmode
6637 and DFmode arguments. Warn for mismatching ABI. */
6638 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6641 cfun->machine->arg_reg_available = (cum->nregs > 0);
6644 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6645 But in the case of vector types, it is some vector mode.
6647 When we have only some of our vector isa extensions enabled, then there
6648 are some modes for which vector_mode_supported_p is false. For these
6649 modes, the generic vector support in gcc will choose some non-vector mode
6650 in order to implement the type. By computing the natural mode, we'll
6651 select the proper ABI location for the operand and not depend on whatever
6652 the middle-end decides to do with these vector types.
6654 The midde-end can't deal with the vector types > 16 bytes. In this
6655 case, we return the original mode and warn ABI change if CUM isn't
6656 NULL.
6658 If INT_RETURN is true, warn ABI change if the vector mode isn't
6659 available for function return value. */
6661 static machine_mode
6662 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6663 bool in_return)
6665 machine_mode mode = TYPE_MODE (type);
6667 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6669 HOST_WIDE_INT size = int_size_in_bytes (type);
6670 if ((size == 8 || size == 16 || size == 32 || size == 64)
6671 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6672 && TYPE_VECTOR_SUBPARTS (type) > 1)
6674 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6676 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6677 mode = MIN_MODE_VECTOR_FLOAT;
6678 else
6679 mode = MIN_MODE_VECTOR_INT;
6681 /* Get the mode which has this inner mode and number of units. */
6682 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6683 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6684 && GET_MODE_INNER (mode) == innermode)
6686 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
6688 static bool warnedavx512f;
6689 static bool warnedavx512f_ret;
6691 if (cum && cum->warn_avx512f && !warnedavx512f)
6693 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6694 "without AVX512F enabled changes the ABI"))
6695 warnedavx512f = true;
6697 else if (in_return && !warnedavx512f_ret)
6699 if (warning (OPT_Wpsabi, "AVX512F vector return "
6700 "without AVX512F enabled changes the ABI"))
6701 warnedavx512f_ret = true;
6704 return TYPE_MODE (type);
6706 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
6708 static bool warnedavx;
6709 static bool warnedavx_ret;
6711 if (cum && cum->warn_avx && !warnedavx)
6713 if (warning (OPT_Wpsabi, "AVX vector argument "
6714 "without AVX enabled changes the ABI"))
6715 warnedavx = true;
6717 else if (in_return && !warnedavx_ret)
6719 if (warning (OPT_Wpsabi, "AVX vector return "
6720 "without AVX enabled changes the ABI"))
6721 warnedavx_ret = true;
6724 return TYPE_MODE (type);
6726 else if (((size == 8 && TARGET_64BIT) || size == 16)
6727 && !TARGET_SSE
6728 && !TARGET_IAMCU)
6730 static bool warnedsse;
6731 static bool warnedsse_ret;
6733 if (cum && cum->warn_sse && !warnedsse)
6735 if (warning (OPT_Wpsabi, "SSE vector argument "
6736 "without SSE enabled changes the ABI"))
6737 warnedsse = true;
6739 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6741 if (warning (OPT_Wpsabi, "SSE vector return "
6742 "without SSE enabled changes the ABI"))
6743 warnedsse_ret = true;
6746 else if ((size == 8 && !TARGET_64BIT)
6747 && !TARGET_MMX
6748 && !TARGET_IAMCU)
6750 static bool warnedmmx;
6751 static bool warnedmmx_ret;
6753 if (cum && cum->warn_mmx && !warnedmmx)
6755 if (warning (OPT_Wpsabi, "MMX vector argument "
6756 "without MMX enabled changes the ABI"))
6757 warnedmmx = true;
6759 else if (in_return && !warnedmmx_ret)
6761 if (warning (OPT_Wpsabi, "MMX vector return "
6762 "without MMX enabled changes the ABI"))
6763 warnedmmx_ret = true;
6766 return mode;
6769 gcc_unreachable ();
6773 return mode;
6776 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6777 this may not agree with the mode that the type system has chosen for the
6778 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6779 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6781 static rtx
6782 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6783 unsigned int regno)
6785 rtx tmp;
6787 if (orig_mode != BLKmode)
6788 tmp = gen_rtx_REG (orig_mode, regno);
6789 else
6791 tmp = gen_rtx_REG (mode, regno);
6792 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6793 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6796 return tmp;
6799 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6800 of this code is to classify each 8bytes of incoming argument by the register
6801 class and assign registers accordingly. */
6803 /* Return the union class of CLASS1 and CLASS2.
6804 See the x86-64 PS ABI for details. */
6806 static enum x86_64_reg_class
6807 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6809 /* Rule #1: If both classes are equal, this is the resulting class. */
6810 if (class1 == class2)
6811 return class1;
6813 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6814 the other class. */
6815 if (class1 == X86_64_NO_CLASS)
6816 return class2;
6817 if (class2 == X86_64_NO_CLASS)
6818 return class1;
6820 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6821 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6822 return X86_64_MEMORY_CLASS;
6824 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6825 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6826 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6827 return X86_64_INTEGERSI_CLASS;
6828 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6829 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6830 return X86_64_INTEGER_CLASS;
6832 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6833 MEMORY is used. */
6834 if (class1 == X86_64_X87_CLASS
6835 || class1 == X86_64_X87UP_CLASS
6836 || class1 == X86_64_COMPLEX_X87_CLASS
6837 || class2 == X86_64_X87_CLASS
6838 || class2 == X86_64_X87UP_CLASS
6839 || class2 == X86_64_COMPLEX_X87_CLASS)
6840 return X86_64_MEMORY_CLASS;
6842 /* Rule #6: Otherwise class SSE is used. */
6843 return X86_64_SSE_CLASS;
6846 /* Classify the argument of type TYPE and mode MODE.
6847 CLASSES will be filled by the register class used to pass each word
6848 of the operand. The number of words is returned. In case the parameter
6849 should be passed in memory, 0 is returned. As a special case for zero
6850 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6852 BIT_OFFSET is used internally for handling records and specifies offset
6853 of the offset in bits modulo 512 to avoid overflow cases.
6855 See the x86-64 PS ABI for details.
6858 static int
6859 classify_argument (machine_mode mode, const_tree type,
6860 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6862 HOST_WIDE_INT bytes =
6863 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6864 int words
6865 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6867 /* Variable sized entities are always passed/returned in memory. */
6868 if (bytes < 0)
6869 return 0;
6871 if (mode != VOIDmode
6872 && targetm.calls.must_pass_in_stack (mode, type))
6873 return 0;
6875 if (type && AGGREGATE_TYPE_P (type))
6877 int i;
6878 tree field;
6879 enum x86_64_reg_class subclasses[MAX_CLASSES];
6881 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6882 if (bytes > 64)
6883 return 0;
6885 for (i = 0; i < words; i++)
6886 classes[i] = X86_64_NO_CLASS;
6888 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6889 signalize memory class, so handle it as special case. */
6890 if (!words)
6892 classes[0] = X86_64_NO_CLASS;
6893 return 1;
6896 /* Classify each field of record and merge classes. */
6897 switch (TREE_CODE (type))
6899 case RECORD_TYPE:
6900 /* And now merge the fields of structure. */
6901 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6903 if (TREE_CODE (field) == FIELD_DECL)
6905 int num;
6907 if (TREE_TYPE (field) == error_mark_node)
6908 continue;
6910 /* Bitfields are always classified as integer. Handle them
6911 early, since later code would consider them to be
6912 misaligned integers. */
6913 if (DECL_BIT_FIELD (field))
6915 for (i = (int_bit_position (field)
6916 + (bit_offset % 64)) / 8 / 8;
6917 i < ((int_bit_position (field) + (bit_offset % 64))
6918 + tree_to_shwi (DECL_SIZE (field))
6919 + 63) / 8 / 8; i++)
6920 classes[i] =
6921 merge_classes (X86_64_INTEGER_CLASS,
6922 classes[i]);
6924 else
6926 int pos;
6928 type = TREE_TYPE (field);
6930 /* Flexible array member is ignored. */
6931 if (TYPE_MODE (type) == BLKmode
6932 && TREE_CODE (type) == ARRAY_TYPE
6933 && TYPE_SIZE (type) == NULL_TREE
6934 && TYPE_DOMAIN (type) != NULL_TREE
6935 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6936 == NULL_TREE))
6938 static bool warned;
6940 if (!warned && warn_psabi)
6942 warned = true;
6943 inform (input_location,
6944 "the ABI of passing struct with"
6945 " a flexible array member has"
6946 " changed in GCC 4.4");
6948 continue;
6950 num = classify_argument (TYPE_MODE (type), type,
6951 subclasses,
6952 (int_bit_position (field)
6953 + bit_offset) % 512);
6954 if (!num)
6955 return 0;
6956 pos = (int_bit_position (field)
6957 + (bit_offset % 64)) / 8 / 8;
6958 for (i = 0; i < num && (i + pos) < words; i++)
6959 classes[i + pos] =
6960 merge_classes (subclasses[i], classes[i + pos]);
6964 break;
6966 case ARRAY_TYPE:
6967 /* Arrays are handled as small records. */
6969 int num;
6970 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6971 TREE_TYPE (type), subclasses, bit_offset);
6972 if (!num)
6973 return 0;
6975 /* The partial classes are now full classes. */
6976 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6977 subclasses[0] = X86_64_SSE_CLASS;
6978 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6979 && !((bit_offset % 64) == 0 && bytes == 4))
6980 subclasses[0] = X86_64_INTEGER_CLASS;
6982 for (i = 0; i < words; i++)
6983 classes[i] = subclasses[i % num];
6985 break;
6987 case UNION_TYPE:
6988 case QUAL_UNION_TYPE:
6989 /* Unions are similar to RECORD_TYPE but offset is always 0.
6991 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6993 if (TREE_CODE (field) == FIELD_DECL)
6995 int num;
6997 if (TREE_TYPE (field) == error_mark_node)
6998 continue;
7000 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
7001 TREE_TYPE (field), subclasses,
7002 bit_offset);
7003 if (!num)
7004 return 0;
7005 for (i = 0; i < num && i < words; i++)
7006 classes[i] = merge_classes (subclasses[i], classes[i]);
7009 break;
7011 default:
7012 gcc_unreachable ();
7015 if (words > 2)
7017 /* When size > 16 bytes, if the first one isn't
7018 X86_64_SSE_CLASS or any other ones aren't
7019 X86_64_SSEUP_CLASS, everything should be passed in
7020 memory. */
7021 if (classes[0] != X86_64_SSE_CLASS)
7022 return 0;
7024 for (i = 1; i < words; i++)
7025 if (classes[i] != X86_64_SSEUP_CLASS)
7026 return 0;
7029 /* Final merger cleanup. */
7030 for (i = 0; i < words; i++)
7032 /* If one class is MEMORY, everything should be passed in
7033 memory. */
7034 if (classes[i] == X86_64_MEMORY_CLASS)
7035 return 0;
7037 /* The X86_64_SSEUP_CLASS should be always preceded by
7038 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
7039 if (classes[i] == X86_64_SSEUP_CLASS
7040 && classes[i - 1] != X86_64_SSE_CLASS
7041 && classes[i - 1] != X86_64_SSEUP_CLASS)
7043 /* The first one should never be X86_64_SSEUP_CLASS. */
7044 gcc_assert (i != 0);
7045 classes[i] = X86_64_SSE_CLASS;
7048 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
7049 everything should be passed in memory. */
7050 if (classes[i] == X86_64_X87UP_CLASS
7051 && (classes[i - 1] != X86_64_X87_CLASS))
7053 static bool warned;
7055 /* The first one should never be X86_64_X87UP_CLASS. */
7056 gcc_assert (i != 0);
7057 if (!warned && warn_psabi)
7059 warned = true;
7060 inform (input_location,
7061 "the ABI of passing union with long double"
7062 " has changed in GCC 4.4");
7064 return 0;
7067 return words;
7070 /* Compute alignment needed. We align all types to natural boundaries with
7071 exception of XFmode that is aligned to 64bits. */
7072 if (mode != VOIDmode && mode != BLKmode)
7074 int mode_alignment = GET_MODE_BITSIZE (mode);
7076 if (mode == XFmode)
7077 mode_alignment = 128;
7078 else if (mode == XCmode)
7079 mode_alignment = 256;
7080 if (COMPLEX_MODE_P (mode))
7081 mode_alignment /= 2;
7082 /* Misaligned fields are always returned in memory. */
7083 if (bit_offset % mode_alignment)
7084 return 0;
7087 /* for V1xx modes, just use the base mode */
7088 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7089 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
7090 mode = GET_MODE_INNER (mode);
7092 /* Classification of atomic types. */
7093 switch (mode)
7095 case SDmode:
7096 case DDmode:
7097 classes[0] = X86_64_SSE_CLASS;
7098 return 1;
7099 case TDmode:
7100 classes[0] = X86_64_SSE_CLASS;
7101 classes[1] = X86_64_SSEUP_CLASS;
7102 return 2;
7103 case DImode:
7104 case SImode:
7105 case HImode:
7106 case QImode:
7107 case CSImode:
7108 case CHImode:
7109 case CQImode:
7111 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7113 /* Analyze last 128 bits only. */
7114 size = (size - 1) & 0x7f;
7116 if (size < 32)
7118 classes[0] = X86_64_INTEGERSI_CLASS;
7119 return 1;
7121 else if (size < 64)
7123 classes[0] = X86_64_INTEGER_CLASS;
7124 return 1;
7126 else if (size < 64+32)
7128 classes[0] = X86_64_INTEGER_CLASS;
7129 classes[1] = X86_64_INTEGERSI_CLASS;
7130 return 2;
7132 else if (size < 64+64)
7134 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7135 return 2;
7137 else
7138 gcc_unreachable ();
7140 case CDImode:
7141 case TImode:
7142 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7143 return 2;
7144 case COImode:
7145 case OImode:
7146 /* OImode shouldn't be used directly. */
7147 gcc_unreachable ();
7148 case CTImode:
7149 return 0;
7150 case SFmode:
7151 if (!(bit_offset % 64))
7152 classes[0] = X86_64_SSESF_CLASS;
7153 else
7154 classes[0] = X86_64_SSE_CLASS;
7155 return 1;
7156 case DFmode:
7157 classes[0] = X86_64_SSEDF_CLASS;
7158 return 1;
7159 case XFmode:
7160 classes[0] = X86_64_X87_CLASS;
7161 classes[1] = X86_64_X87UP_CLASS;
7162 return 2;
7163 case TFmode:
7164 classes[0] = X86_64_SSE_CLASS;
7165 classes[1] = X86_64_SSEUP_CLASS;
7166 return 2;
7167 case SCmode:
7168 classes[0] = X86_64_SSE_CLASS;
7169 if (!(bit_offset % 64))
7170 return 1;
7171 else
7173 static bool warned;
7175 if (!warned && warn_psabi)
7177 warned = true;
7178 inform (input_location,
7179 "the ABI of passing structure with complex float"
7180 " member has changed in GCC 4.4");
7182 classes[1] = X86_64_SSESF_CLASS;
7183 return 2;
7185 case DCmode:
7186 classes[0] = X86_64_SSEDF_CLASS;
7187 classes[1] = X86_64_SSEDF_CLASS;
7188 return 2;
7189 case XCmode:
7190 classes[0] = X86_64_COMPLEX_X87_CLASS;
7191 return 1;
7192 case TCmode:
7193 /* This modes is larger than 16 bytes. */
7194 return 0;
7195 case V8SFmode:
7196 case V8SImode:
7197 case V32QImode:
7198 case V16HImode:
7199 case V4DFmode:
7200 case V4DImode:
7201 classes[0] = X86_64_SSE_CLASS;
7202 classes[1] = X86_64_SSEUP_CLASS;
7203 classes[2] = X86_64_SSEUP_CLASS;
7204 classes[3] = X86_64_SSEUP_CLASS;
7205 return 4;
7206 case V8DFmode:
7207 case V16SFmode:
7208 case V8DImode:
7209 case V16SImode:
7210 case V32HImode:
7211 case V64QImode:
7212 classes[0] = X86_64_SSE_CLASS;
7213 classes[1] = X86_64_SSEUP_CLASS;
7214 classes[2] = X86_64_SSEUP_CLASS;
7215 classes[3] = X86_64_SSEUP_CLASS;
7216 classes[4] = X86_64_SSEUP_CLASS;
7217 classes[5] = X86_64_SSEUP_CLASS;
7218 classes[6] = X86_64_SSEUP_CLASS;
7219 classes[7] = X86_64_SSEUP_CLASS;
7220 return 8;
7221 case V4SFmode:
7222 case V4SImode:
7223 case V16QImode:
7224 case V8HImode:
7225 case V2DFmode:
7226 case V2DImode:
7227 classes[0] = X86_64_SSE_CLASS;
7228 classes[1] = X86_64_SSEUP_CLASS;
7229 return 2;
7230 case V1TImode:
7231 case V1DImode:
7232 case V2SFmode:
7233 case V2SImode:
7234 case V4HImode:
7235 case V8QImode:
7236 classes[0] = X86_64_SSE_CLASS;
7237 return 1;
7238 case BLKmode:
7239 case VOIDmode:
7240 return 0;
7241 default:
7242 gcc_assert (VECTOR_MODE_P (mode));
7244 if (bytes > 16)
7245 return 0;
7247 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7249 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7250 classes[0] = X86_64_INTEGERSI_CLASS;
7251 else
7252 classes[0] = X86_64_INTEGER_CLASS;
7253 classes[1] = X86_64_INTEGER_CLASS;
7254 return 1 + (bytes > 8);
7258 /* Examine the argument and return set number of register required in each
7259 class. Return true iff parameter should be passed in memory. */
7261 static bool
7262 examine_argument (machine_mode mode, const_tree type, int in_return,
7263 int *int_nregs, int *sse_nregs)
7265 enum x86_64_reg_class regclass[MAX_CLASSES];
7266 int n = classify_argument (mode, type, regclass, 0);
7268 *int_nregs = 0;
7269 *sse_nregs = 0;
7271 if (!n)
7272 return true;
7273 for (n--; n >= 0; n--)
7274 switch (regclass[n])
7276 case X86_64_INTEGER_CLASS:
7277 case X86_64_INTEGERSI_CLASS:
7278 (*int_nregs)++;
7279 break;
7280 case X86_64_SSE_CLASS:
7281 case X86_64_SSESF_CLASS:
7282 case X86_64_SSEDF_CLASS:
7283 (*sse_nregs)++;
7284 break;
7285 case X86_64_NO_CLASS:
7286 case X86_64_SSEUP_CLASS:
7287 break;
7288 case X86_64_X87_CLASS:
7289 case X86_64_X87UP_CLASS:
7290 case X86_64_COMPLEX_X87_CLASS:
7291 if (!in_return)
7292 return true;
7293 break;
7294 case X86_64_MEMORY_CLASS:
7295 gcc_unreachable ();
7298 return false;
7301 /* Construct container for the argument used by GCC interface. See
7302 FUNCTION_ARG for the detailed description. */
7304 static rtx
7305 construct_container (machine_mode mode, machine_mode orig_mode,
7306 const_tree type, int in_return, int nintregs, int nsseregs,
7307 const int *intreg, int sse_regno)
7309 /* The following variables hold the static issued_error state. */
7310 static bool issued_sse_arg_error;
7311 static bool issued_sse_ret_error;
7312 static bool issued_x87_ret_error;
7314 machine_mode tmpmode;
7315 int bytes =
7316 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7317 enum x86_64_reg_class regclass[MAX_CLASSES];
7318 int n;
7319 int i;
7320 int nexps = 0;
7321 int needed_sseregs, needed_intregs;
7322 rtx exp[MAX_CLASSES];
7323 rtx ret;
7325 n = classify_argument (mode, type, regclass, 0);
7326 if (!n)
7327 return NULL;
7328 if (examine_argument (mode, type, in_return, &needed_intregs,
7329 &needed_sseregs))
7330 return NULL;
7331 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7332 return NULL;
7334 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7335 some less clueful developer tries to use floating-point anyway. */
7336 if (needed_sseregs && !TARGET_SSE)
7338 if (in_return)
7340 if (!issued_sse_ret_error)
7342 error ("SSE register return with SSE disabled");
7343 issued_sse_ret_error = true;
7346 else if (!issued_sse_arg_error)
7348 error ("SSE register argument with SSE disabled");
7349 issued_sse_arg_error = true;
7351 return NULL;
7354 /* Likewise, error if the ABI requires us to return values in the
7355 x87 registers and the user specified -mno-80387. */
7356 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7357 for (i = 0; i < n; i++)
7358 if (regclass[i] == X86_64_X87_CLASS
7359 || regclass[i] == X86_64_X87UP_CLASS
7360 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7362 if (!issued_x87_ret_error)
7364 error ("x87 register return with x87 disabled");
7365 issued_x87_ret_error = true;
7367 return NULL;
7370 /* First construct simple cases. Avoid SCmode, since we want to use
7371 single register to pass this type. */
7372 if (n == 1 && mode != SCmode)
7373 switch (regclass[0])
7375 case X86_64_INTEGER_CLASS:
7376 case X86_64_INTEGERSI_CLASS:
7377 return gen_rtx_REG (mode, intreg[0]);
7378 case X86_64_SSE_CLASS:
7379 case X86_64_SSESF_CLASS:
7380 case X86_64_SSEDF_CLASS:
7381 if (mode != BLKmode)
7382 return gen_reg_or_parallel (mode, orig_mode,
7383 SSE_REGNO (sse_regno));
7384 break;
7385 case X86_64_X87_CLASS:
7386 case X86_64_COMPLEX_X87_CLASS:
7387 return gen_rtx_REG (mode, FIRST_STACK_REG);
7388 case X86_64_NO_CLASS:
7389 /* Zero sized array, struct or class. */
7390 return NULL;
7391 default:
7392 gcc_unreachable ();
7394 if (n == 2
7395 && regclass[0] == X86_64_SSE_CLASS
7396 && regclass[1] == X86_64_SSEUP_CLASS
7397 && mode != BLKmode)
7398 return gen_reg_or_parallel (mode, orig_mode,
7399 SSE_REGNO (sse_regno));
7400 if (n == 4
7401 && regclass[0] == X86_64_SSE_CLASS
7402 && regclass[1] == X86_64_SSEUP_CLASS
7403 && regclass[2] == X86_64_SSEUP_CLASS
7404 && regclass[3] == X86_64_SSEUP_CLASS
7405 && mode != BLKmode)
7406 return gen_reg_or_parallel (mode, orig_mode,
7407 SSE_REGNO (sse_regno));
7408 if (n == 8
7409 && regclass[0] == X86_64_SSE_CLASS
7410 && regclass[1] == X86_64_SSEUP_CLASS
7411 && regclass[2] == X86_64_SSEUP_CLASS
7412 && regclass[3] == X86_64_SSEUP_CLASS
7413 && regclass[4] == X86_64_SSEUP_CLASS
7414 && regclass[5] == X86_64_SSEUP_CLASS
7415 && regclass[6] == X86_64_SSEUP_CLASS
7416 && regclass[7] == X86_64_SSEUP_CLASS
7417 && mode != BLKmode)
7418 return gen_reg_or_parallel (mode, orig_mode,
7419 SSE_REGNO (sse_regno));
7420 if (n == 2
7421 && regclass[0] == X86_64_X87_CLASS
7422 && regclass[1] == X86_64_X87UP_CLASS)
7423 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7425 if (n == 2
7426 && regclass[0] == X86_64_INTEGER_CLASS
7427 && regclass[1] == X86_64_INTEGER_CLASS
7428 && (mode == CDImode || mode == TImode)
7429 && intreg[0] + 1 == intreg[1])
7430 return gen_rtx_REG (mode, intreg[0]);
7432 /* Otherwise figure out the entries of the PARALLEL. */
7433 for (i = 0; i < n; i++)
7435 int pos;
7437 switch (regclass[i])
7439 case X86_64_NO_CLASS:
7440 break;
7441 case X86_64_INTEGER_CLASS:
7442 case X86_64_INTEGERSI_CLASS:
7443 /* Merge TImodes on aligned occasions here too. */
7444 if (i * 8 + 8 > bytes)
7445 tmpmode
7446 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7447 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7448 tmpmode = SImode;
7449 else
7450 tmpmode = DImode;
7451 /* We've requested 24 bytes we
7452 don't have mode for. Use DImode. */
7453 if (tmpmode == BLKmode)
7454 tmpmode = DImode;
7455 exp [nexps++]
7456 = gen_rtx_EXPR_LIST (VOIDmode,
7457 gen_rtx_REG (tmpmode, *intreg),
7458 GEN_INT (i*8));
7459 intreg++;
7460 break;
7461 case X86_64_SSESF_CLASS:
7462 exp [nexps++]
7463 = gen_rtx_EXPR_LIST (VOIDmode,
7464 gen_rtx_REG (SFmode,
7465 SSE_REGNO (sse_regno)),
7466 GEN_INT (i*8));
7467 sse_regno++;
7468 break;
7469 case X86_64_SSEDF_CLASS:
7470 exp [nexps++]
7471 = gen_rtx_EXPR_LIST (VOIDmode,
7472 gen_rtx_REG (DFmode,
7473 SSE_REGNO (sse_regno)),
7474 GEN_INT (i*8));
7475 sse_regno++;
7476 break;
7477 case X86_64_SSE_CLASS:
7478 pos = i;
7479 switch (n)
7481 case 1:
7482 tmpmode = DImode;
7483 break;
7484 case 2:
7485 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7487 tmpmode = TImode;
7488 i++;
7490 else
7491 tmpmode = DImode;
7492 break;
7493 case 4:
7494 gcc_assert (i == 0
7495 && regclass[1] == X86_64_SSEUP_CLASS
7496 && regclass[2] == X86_64_SSEUP_CLASS
7497 && regclass[3] == X86_64_SSEUP_CLASS);
7498 tmpmode = OImode;
7499 i += 3;
7500 break;
7501 case 8:
7502 gcc_assert (i == 0
7503 && regclass[1] == X86_64_SSEUP_CLASS
7504 && regclass[2] == X86_64_SSEUP_CLASS
7505 && regclass[3] == X86_64_SSEUP_CLASS
7506 && regclass[4] == X86_64_SSEUP_CLASS
7507 && regclass[5] == X86_64_SSEUP_CLASS
7508 && regclass[6] == X86_64_SSEUP_CLASS
7509 && regclass[7] == X86_64_SSEUP_CLASS);
7510 tmpmode = XImode;
7511 i += 7;
7512 break;
7513 default:
7514 gcc_unreachable ();
7516 exp [nexps++]
7517 = gen_rtx_EXPR_LIST (VOIDmode,
7518 gen_rtx_REG (tmpmode,
7519 SSE_REGNO (sse_regno)),
7520 GEN_INT (pos*8));
7521 sse_regno++;
7522 break;
7523 default:
7524 gcc_unreachable ();
7528 /* Empty aligned struct, union or class. */
7529 if (nexps == 0)
7530 return NULL;
7532 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7533 for (i = 0; i < nexps; i++)
7534 XVECEXP (ret, 0, i) = exp [i];
7535 return ret;
7538 /* Update the data in CUM to advance over an argument of mode MODE
7539 and data type TYPE. (TYPE is null for libcalls where that information
7540 may not be available.)
7542 Return a number of integer regsiters advanced over. */
7544 static int
7545 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7546 const_tree type, HOST_WIDE_INT bytes,
7547 HOST_WIDE_INT words)
7549 int res = 0;
7550 bool error_p = NULL;
7552 if (TARGET_IAMCU)
7554 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7555 bytes in registers. */
7556 if (!VECTOR_MODE_P (mode) && bytes <= 8)
7557 goto pass_in_reg;
7558 return res;
7561 switch (mode)
7563 default:
7564 break;
7566 case BLKmode:
7567 if (bytes < 0)
7568 break;
7569 /* FALLTHRU */
7571 case DImode:
7572 case SImode:
7573 case HImode:
7574 case QImode:
7575 pass_in_reg:
7576 cum->words += words;
7577 cum->nregs -= words;
7578 cum->regno += words;
7579 if (cum->nregs >= 0)
7580 res = words;
7581 if (cum->nregs <= 0)
7583 cum->nregs = 0;
7584 cfun->machine->arg_reg_available = false;
7585 cum->regno = 0;
7587 break;
7589 case OImode:
7590 /* OImode shouldn't be used directly. */
7591 gcc_unreachable ();
7593 case DFmode:
7594 if (cum->float_in_sse == -1)
7595 error_p = 1;
7596 if (cum->float_in_sse < 2)
7597 break;
7598 case SFmode:
7599 if (cum->float_in_sse == -1)
7600 error_p = 1;
7601 if (cum->float_in_sse < 1)
7602 break;
7603 /* FALLTHRU */
7605 case V8SFmode:
7606 case V8SImode:
7607 case V64QImode:
7608 case V32HImode:
7609 case V16SImode:
7610 case V8DImode:
7611 case V16SFmode:
7612 case V8DFmode:
7613 case V32QImode:
7614 case V16HImode:
7615 case V4DFmode:
7616 case V4DImode:
7617 case TImode:
7618 case V16QImode:
7619 case V8HImode:
7620 case V4SImode:
7621 case V2DImode:
7622 case V4SFmode:
7623 case V2DFmode:
7624 if (!type || !AGGREGATE_TYPE_P (type))
7626 cum->sse_words += words;
7627 cum->sse_nregs -= 1;
7628 cum->sse_regno += 1;
7629 if (cum->sse_nregs <= 0)
7631 cum->sse_nregs = 0;
7632 cum->sse_regno = 0;
7635 break;
7637 case V8QImode:
7638 case V4HImode:
7639 case V2SImode:
7640 case V2SFmode:
7641 case V1TImode:
7642 case V1DImode:
7643 if (!type || !AGGREGATE_TYPE_P (type))
7645 cum->mmx_words += words;
7646 cum->mmx_nregs -= 1;
7647 cum->mmx_regno += 1;
7648 if (cum->mmx_nregs <= 0)
7650 cum->mmx_nregs = 0;
7651 cum->mmx_regno = 0;
7654 break;
7656 if (error_p)
7658 cum->float_in_sse = 0;
7659 error ("calling %qD with SSE calling convention without "
7660 "SSE/SSE2 enabled", cum->decl);
7661 sorry ("this is a GCC bug that can be worked around by adding "
7662 "attribute used to function called");
7665 return res;
7668 static int
7669 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7670 const_tree type, HOST_WIDE_INT words, bool named)
7672 int int_nregs, sse_nregs;
7674 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7675 if (!named && (VALID_AVX512F_REG_MODE (mode)
7676 || VALID_AVX256_REG_MODE (mode)))
7677 return 0;
7679 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7680 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7682 cum->nregs -= int_nregs;
7683 cum->sse_nregs -= sse_nregs;
7684 cum->regno += int_nregs;
7685 cum->sse_regno += sse_nregs;
7686 return int_nregs;
7688 else
7690 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7691 cum->words = (cum->words + align - 1) & ~(align - 1);
7692 cum->words += words;
7693 return 0;
7697 static int
7698 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7699 HOST_WIDE_INT words)
7701 /* Otherwise, this should be passed indirect. */
7702 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7704 cum->words += words;
7705 if (cum->nregs > 0)
7707 cum->nregs -= 1;
7708 cum->regno += 1;
7709 return 1;
7711 return 0;
7714 /* Update the data in CUM to advance over an argument of mode MODE and
7715 data type TYPE. (TYPE is null for libcalls where that information
7716 may not be available.) */
7718 static void
7719 ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7720 const_tree type, bool named)
7722 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7723 HOST_WIDE_INT bytes, words;
7724 int nregs;
7726 if (mode == BLKmode)
7727 bytes = int_size_in_bytes (type);
7728 else
7729 bytes = GET_MODE_SIZE (mode);
7730 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7732 if (type)
7733 mode = type_natural_mode (type, NULL, false);
7735 if ((type && POINTER_BOUNDS_TYPE_P (type))
7736 || POINTER_BOUNDS_MODE_P (mode))
7738 /* If we pass bounds in BT then just update remained bounds count. */
7739 if (cum->bnds_in_bt)
7741 cum->bnds_in_bt--;
7742 return;
7745 /* Update remained number of bounds to force. */
7746 if (cum->force_bnd_pass)
7747 cum->force_bnd_pass--;
7749 cum->bnd_regno++;
7751 return;
7754 /* The first arg not going to Bounds Tables resets this counter. */
7755 cum->bnds_in_bt = 0;
7756 /* For unnamed args we always pass bounds to avoid bounds mess when
7757 passed and received types do not match. If bounds do not follow
7758 unnamed arg, still pretend required number of bounds were passed. */
7759 if (cum->force_bnd_pass)
7761 cum->bnd_regno += cum->force_bnd_pass;
7762 cum->force_bnd_pass = 0;
7765 if (TARGET_64BIT)
7767 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
7769 if (call_abi == MS_ABI)
7770 nregs = function_arg_advance_ms_64 (cum, bytes, words);
7771 else
7772 nregs = function_arg_advance_64 (cum, mode, type, words, named);
7774 else
7775 nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7777 /* For stdarg we expect bounds to be passed for each value passed
7778 in register. */
7779 if (cum->stdarg)
7780 cum->force_bnd_pass = nregs;
7781 /* For pointers passed in memory we expect bounds passed in Bounds
7782 Table. */
7783 if (!nregs)
7784 cum->bnds_in_bt = chkp_type_bounds_count (type);
7787 /* Define where to put the arguments to a function.
7788 Value is zero to push the argument on the stack,
7789 or a hard register in which to store the argument.
7791 MODE is the argument's machine mode.
7792 TYPE is the data type of the argument (as a tree).
7793 This is null for libcalls where that information may
7794 not be available.
7795 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7796 the preceding args and about the function being called.
7797 NAMED is nonzero if this argument is a named parameter
7798 (otherwise it is an extra parameter matching an ellipsis). */
7800 static rtx
7801 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7802 machine_mode orig_mode, const_tree type,
7803 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7805 bool error_p = false;
7806 /* Avoid the AL settings for the Unix64 ABI. */
7807 if (mode == VOIDmode)
7808 return constm1_rtx;
7810 if (TARGET_IAMCU)
7812 /* Intel MCU psABI passes scalars and aggregates no larger than 8
7813 bytes in registers. */
7814 if (!VECTOR_MODE_P (mode) && bytes <= 8)
7815 goto pass_in_reg;
7816 return NULL_RTX;
7819 switch (mode)
7821 default:
7822 break;
7824 case BLKmode:
7825 if (bytes < 0)
7826 break;
7827 /* FALLTHRU */
7828 case DImode:
7829 case SImode:
7830 case HImode:
7831 case QImode:
7832 pass_in_reg:
7833 if (words <= cum->nregs)
7835 int regno = cum->regno;
7837 /* Fastcall allocates the first two DWORD (SImode) or
7838 smaller arguments to ECX and EDX if it isn't an
7839 aggregate type . */
7840 if (cum->fastcall)
7842 if (mode == BLKmode
7843 || mode == DImode
7844 || (type && AGGREGATE_TYPE_P (type)))
7845 break;
7847 /* ECX not EAX is the first allocated register. */
7848 if (regno == AX_REG)
7849 regno = CX_REG;
7851 return gen_rtx_REG (mode, regno);
7853 break;
7855 case DFmode:
7856 if (cum->float_in_sse == -1)
7857 error_p = 1;
7858 if (cum->float_in_sse < 2)
7859 break;
7860 case SFmode:
7861 if (cum->float_in_sse == -1)
7862 error_p = 1;
7863 if (cum->float_in_sse < 1)
7864 break;
7865 /* FALLTHRU */
7866 case TImode:
7867 /* In 32bit, we pass TImode in xmm registers. */
7868 case V16QImode:
7869 case V8HImode:
7870 case V4SImode:
7871 case V2DImode:
7872 case V4SFmode:
7873 case V2DFmode:
7874 if (!type || !AGGREGATE_TYPE_P (type))
7876 if (cum->sse_nregs)
7877 return gen_reg_or_parallel (mode, orig_mode,
7878 cum->sse_regno + FIRST_SSE_REG);
7880 break;
7882 case OImode:
7883 case XImode:
7884 /* OImode and XImode shouldn't be used directly. */
7885 gcc_unreachable ();
7887 case V64QImode:
7888 case V32HImode:
7889 case V16SImode:
7890 case V8DImode:
7891 case V16SFmode:
7892 case V8DFmode:
7893 case V8SFmode:
7894 case V8SImode:
7895 case V32QImode:
7896 case V16HImode:
7897 case V4DFmode:
7898 case V4DImode:
7899 if (!type || !AGGREGATE_TYPE_P (type))
7901 if (cum->sse_nregs)
7902 return gen_reg_or_parallel (mode, orig_mode,
7903 cum->sse_regno + FIRST_SSE_REG);
7905 break;
7907 case V8QImode:
7908 case V4HImode:
7909 case V2SImode:
7910 case V2SFmode:
7911 case V1TImode:
7912 case V1DImode:
7913 if (!type || !AGGREGATE_TYPE_P (type))
7915 if (cum->mmx_nregs)
7916 return gen_reg_or_parallel (mode, orig_mode,
7917 cum->mmx_regno + FIRST_MMX_REG);
7919 break;
7921 if (error_p)
7923 cum->float_in_sse = 0;
7924 error ("calling %qD with SSE calling convention without "
7925 "SSE/SSE2 enabled", cum->decl);
7926 sorry ("this is a GCC bug that can be worked around by adding "
7927 "attribute used to function called");
7930 return NULL_RTX;
7933 static rtx
7934 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7935 machine_mode orig_mode, const_tree type, bool named)
7937 /* Handle a hidden AL argument containing number of registers
7938 for varargs x86-64 functions. */
7939 if (mode == VOIDmode)
7940 return GEN_INT (cum->maybe_vaarg
7941 ? (cum->sse_nregs < 0
7942 ? X86_64_SSE_REGPARM_MAX
7943 : cum->sse_regno)
7944 : -1);
7946 switch (mode)
7948 default:
7949 break;
7951 case V8SFmode:
7952 case V8SImode:
7953 case V32QImode:
7954 case V16HImode:
7955 case V4DFmode:
7956 case V4DImode:
7957 case V16SFmode:
7958 case V16SImode:
7959 case V64QImode:
7960 case V32HImode:
7961 case V8DFmode:
7962 case V8DImode:
7963 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7964 if (!named)
7965 return NULL;
7966 break;
7969 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7970 cum->sse_nregs,
7971 &x86_64_int_parameter_registers [cum->regno],
7972 cum->sse_regno);
7975 static rtx
7976 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7977 machine_mode orig_mode, bool named,
7978 HOST_WIDE_INT bytes)
7980 unsigned int regno;
7982 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7983 We use value of -2 to specify that current function call is MSABI. */
7984 if (mode == VOIDmode)
7985 return GEN_INT (-2);
7987 /* If we've run out of registers, it goes on the stack. */
7988 if (cum->nregs == 0)
7989 return NULL_RTX;
7991 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7993 /* Only floating point modes are passed in anything but integer regs. */
7994 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7996 if (named)
7997 regno = cum->regno + FIRST_SSE_REG;
7998 else
8000 rtx t1, t2;
8002 /* Unnamed floating parameters are passed in both the
8003 SSE and integer registers. */
8004 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
8005 t2 = gen_rtx_REG (mode, regno);
8006 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
8007 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
8008 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
8011 /* Handle aggregated types passed in register. */
8012 if (orig_mode == BLKmode)
8014 if (bytes > 0 && bytes <= 8)
8015 mode = (bytes > 4 ? DImode : SImode);
8016 if (mode == BLKmode)
8017 mode = DImode;
8020 return gen_reg_or_parallel (mode, orig_mode, regno);
8023 /* Return where to put the arguments to a function.
8024 Return zero to push the argument on the stack, or a hard register in which to store the argument.
8026 MODE is the argument's machine mode. TYPE is the data type of the
8027 argument. It is null for libcalls where that information may not be
8028 available. CUM gives information about the preceding args and about
8029 the function being called. NAMED is nonzero if this argument is a
8030 named parameter (otherwise it is an extra parameter matching an
8031 ellipsis). */
8033 static rtx
8034 ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
8035 const_tree type, bool named)
8037 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8038 machine_mode mode = omode;
8039 HOST_WIDE_INT bytes, words;
8040 rtx arg;
8042 /* All pointer bounds argumntas are handled separately here. */
8043 if ((type && POINTER_BOUNDS_TYPE_P (type))
8044 || POINTER_BOUNDS_MODE_P (mode))
8046 /* Return NULL if bounds are forced to go in Bounds Table. */
8047 if (cum->bnds_in_bt)
8048 arg = NULL;
8049 /* Return the next available bound reg if any. */
8050 else if (cum->bnd_regno <= LAST_BND_REG)
8051 arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
8052 /* Return the next special slot number otherwise. */
8053 else
8054 arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
8056 return arg;
8059 if (mode == BLKmode)
8060 bytes = int_size_in_bytes (type);
8061 else
8062 bytes = GET_MODE_SIZE (mode);
8063 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8065 /* To simplify the code below, represent vector types with a vector mode
8066 even if MMX/SSE are not active. */
8067 if (type && TREE_CODE (type) == VECTOR_TYPE)
8068 mode = type_natural_mode (type, cum, false);
8070 if (TARGET_64BIT)
8072 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8074 if (call_abi == MS_ABI)
8075 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
8076 else
8077 arg = function_arg_64 (cum, mode, omode, type, named);
8079 else
8080 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
8082 return arg;
8085 /* A C expression that indicates when an argument must be passed by
8086 reference. If nonzero for an argument, a copy of that argument is
8087 made in memory and a pointer to the argument is passed instead of
8088 the argument itself. The pointer is passed in whatever way is
8089 appropriate for passing a pointer to that type. */
8091 static bool
8092 ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
8093 const_tree type, bool)
8095 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8097 /* Bounds are never passed by reference. */
8098 if ((type && POINTER_BOUNDS_TYPE_P (type))
8099 || POINTER_BOUNDS_MODE_P (mode))
8100 return false;
8102 if (TARGET_64BIT)
8104 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
8106 /* See Windows x64 Software Convention. */
8107 if (call_abi == MS_ABI)
8109 HOST_WIDE_INT msize = GET_MODE_SIZE (mode);
8111 if (type)
8113 /* Arrays are passed by reference. */
8114 if (TREE_CODE (type) == ARRAY_TYPE)
8115 return true;
8117 if (RECORD_OR_UNION_TYPE_P (type))
8119 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8120 are passed by reference. */
8121 msize = int_size_in_bytes (type);
8125 /* __m128 is passed by reference. */
8126 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
8128 else if (type && int_size_in_bytes (type) == -1)
8129 return true;
8132 return false;
8135 /* Return true when TYPE should be 128bit aligned for 32bit argument
8136 passing ABI. XXX: This function is obsolete and is only used for
8137 checking psABI compatibility with previous versions of GCC. */
8139 static bool
8140 ix86_compat_aligned_value_p (const_tree type)
8142 machine_mode mode = TYPE_MODE (type);
8143 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8144 || mode == TDmode
8145 || mode == TFmode
8146 || mode == TCmode)
8147 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8148 return true;
8149 if (TYPE_ALIGN (type) < 128)
8150 return false;
8152 if (AGGREGATE_TYPE_P (type))
8154 /* Walk the aggregates recursively. */
8155 switch (TREE_CODE (type))
8157 case RECORD_TYPE:
8158 case UNION_TYPE:
8159 case QUAL_UNION_TYPE:
8161 tree field;
8163 /* Walk all the structure fields. */
8164 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8166 if (TREE_CODE (field) == FIELD_DECL
8167 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8168 return true;
8170 break;
8173 case ARRAY_TYPE:
8174 /* Just for use if some languages passes arrays by value. */
8175 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8176 return true;
8177 break;
8179 default:
8180 gcc_unreachable ();
8183 return false;
8186 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8187 XXX: This function is obsolete and is only used for checking psABI
8188 compatibility with previous versions of GCC. */
8190 static unsigned int
8191 ix86_compat_function_arg_boundary (machine_mode mode,
8192 const_tree type, unsigned int align)
8194 /* In 32bit, only _Decimal128 and __float128 are aligned to their
8195 natural boundaries. */
8196 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8198 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
8199 make an exception for SSE modes since these require 128bit
8200 alignment.
8202 The handling here differs from field_alignment. ICC aligns MMX
8203 arguments to 4 byte boundaries, while structure fields are aligned
8204 to 8 byte boundaries. */
8205 if (!type)
8207 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8208 align = PARM_BOUNDARY;
8210 else
8212 if (!ix86_compat_aligned_value_p (type))
8213 align = PARM_BOUNDARY;
8216 if (align > BIGGEST_ALIGNMENT)
8217 align = BIGGEST_ALIGNMENT;
8218 return align;
8221 /* Return true when TYPE should be 128bit aligned for 32bit argument
8222 passing ABI. */
8224 static bool
8225 ix86_contains_aligned_value_p (const_tree type)
8227 machine_mode mode = TYPE_MODE (type);
8229 if (mode == XFmode || mode == XCmode)
8230 return false;
8232 if (TYPE_ALIGN (type) < 128)
8233 return false;
8235 if (AGGREGATE_TYPE_P (type))
8237 /* Walk the aggregates recursively. */
8238 switch (TREE_CODE (type))
8240 case RECORD_TYPE:
8241 case UNION_TYPE:
8242 case QUAL_UNION_TYPE:
8244 tree field;
8246 /* Walk all the structure fields. */
8247 for (field = TYPE_FIELDS (type);
8248 field;
8249 field = DECL_CHAIN (field))
8251 if (TREE_CODE (field) == FIELD_DECL
8252 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8253 return true;
8255 break;
8258 case ARRAY_TYPE:
8259 /* Just for use if some languages passes arrays by value. */
8260 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8261 return true;
8262 break;
8264 default:
8265 gcc_unreachable ();
8268 else
8269 return TYPE_ALIGN (type) >= 128;
8271 return false;
8274 /* Gives the alignment boundary, in bits, of an argument with the
8275 specified mode and type. */
8277 static unsigned int
8278 ix86_function_arg_boundary (machine_mode mode, const_tree type)
8280 unsigned int align;
8281 if (type)
8283 /* Since the main variant type is used for call, we convert it to
8284 the main variant type. */
8285 type = TYPE_MAIN_VARIANT (type);
8286 align = TYPE_ALIGN (type);
8288 else
8289 align = GET_MODE_ALIGNMENT (mode);
8290 if (align < PARM_BOUNDARY)
8291 align = PARM_BOUNDARY;
8292 else
8294 static bool warned;
8295 unsigned int saved_align = align;
8297 if (!TARGET_64BIT)
8299 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
8300 if (!type)
8302 if (mode == XFmode || mode == XCmode)
8303 align = PARM_BOUNDARY;
8305 else if (!ix86_contains_aligned_value_p (type))
8306 align = PARM_BOUNDARY;
8308 if (align < 128)
8309 align = PARM_BOUNDARY;
8312 if (warn_psabi
8313 && !warned
8314 && align != ix86_compat_function_arg_boundary (mode, type,
8315 saved_align))
8317 warned = true;
8318 inform (input_location,
8319 "The ABI for passing parameters with %d-byte"
8320 " alignment has changed in GCC 4.6",
8321 align / BITS_PER_UNIT);
8325 return align;
8328 /* Return true if N is a possible register number of function value. */
8330 static bool
8331 ix86_function_value_regno_p (const unsigned int regno)
8333 switch (regno)
8335 case AX_REG:
8336 return true;
8337 case DX_REG:
8338 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8339 case DI_REG:
8340 case SI_REG:
8341 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8343 case BND0_REG:
8344 case BND1_REG:
8345 return chkp_function_instrumented_p (current_function_decl);
8347 /* Complex values are returned in %st(0)/%st(1) pair. */
8348 case ST0_REG:
8349 case ST1_REG:
8350 /* TODO: The function should depend on current function ABI but
8351 builtins.c would need updating then. Therefore we use the
8352 default ABI. */
8353 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8354 return false;
8355 return TARGET_FLOAT_RETURNS_IN_80387;
8357 /* Complex values are returned in %xmm0/%xmm1 pair. */
8358 case XMM0_REG:
8359 case XMM1_REG:
8360 return TARGET_SSE;
8362 case MM0_REG:
8363 if (TARGET_MACHO || TARGET_64BIT)
8364 return false;
8365 return TARGET_MMX;
8368 return false;
8371 /* Define how to find the value returned by a function.
8372 VALTYPE is the data type of the value (as a tree).
8373 If the precise function being called is known, FUNC is its FUNCTION_DECL;
8374 otherwise, FUNC is 0. */
8376 static rtx
8377 function_value_32 (machine_mode orig_mode, machine_mode mode,
8378 const_tree fntype, const_tree fn)
8380 unsigned int regno;
8382 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8383 we normally prevent this case when mmx is not available. However
8384 some ABIs may require the result to be returned like DImode. */
8385 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8386 regno = FIRST_MMX_REG;
8388 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
8389 we prevent this case when sse is not available. However some ABIs
8390 may require the result to be returned like integer TImode. */
8391 else if (mode == TImode
8392 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8393 regno = FIRST_SSE_REG;
8395 /* 32-byte vector modes in %ymm0. */
8396 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8397 regno = FIRST_SSE_REG;
8399 /* 64-byte vector modes in %zmm0. */
8400 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8401 regno = FIRST_SSE_REG;
8403 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
8404 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8405 regno = FIRST_FLOAT_REG;
8406 else
8407 /* Most things go in %eax. */
8408 regno = AX_REG;
8410 /* Override FP return register with %xmm0 for local functions when
8411 SSE math is enabled or for functions with sseregparm attribute. */
8412 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8414 int sse_level = ix86_function_sseregparm (fntype, fn, false);
8415 if (sse_level == -1)
8417 error ("calling %qD with SSE caling convention without "
8418 "SSE/SSE2 enabled", fn);
8419 sorry ("this is a GCC bug that can be worked around by adding "
8420 "attribute used to function called");
8422 else if ((sse_level >= 1 && mode == SFmode)
8423 || (sse_level == 2 && mode == DFmode))
8424 regno = FIRST_SSE_REG;
8427 /* OImode shouldn't be used directly. */
8428 gcc_assert (mode != OImode);
8430 return gen_rtx_REG (orig_mode, regno);
8433 static rtx
8434 function_value_64 (machine_mode orig_mode, machine_mode mode,
8435 const_tree valtype)
8437 rtx ret;
8439 /* Handle libcalls, which don't provide a type node. */
8440 if (valtype == NULL)
8442 unsigned int regno;
8444 switch (mode)
8446 case SFmode:
8447 case SCmode:
8448 case DFmode:
8449 case DCmode:
8450 case TFmode:
8451 case SDmode:
8452 case DDmode:
8453 case TDmode:
8454 regno = FIRST_SSE_REG;
8455 break;
8456 case XFmode:
8457 case XCmode:
8458 regno = FIRST_FLOAT_REG;
8459 break;
8460 case TCmode:
8461 return NULL;
8462 default:
8463 regno = AX_REG;
8466 return gen_rtx_REG (mode, regno);
8468 else if (POINTER_TYPE_P (valtype))
8470 /* Pointers are always returned in word_mode. */
8471 mode = word_mode;
8474 ret = construct_container (mode, orig_mode, valtype, 1,
8475 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8476 x86_64_int_return_registers, 0);
8478 /* For zero sized structures, construct_container returns NULL, but we
8479 need to keep rest of compiler happy by returning meaningful value. */
8480 if (!ret)
8481 ret = gen_rtx_REG (orig_mode, AX_REG);
8483 return ret;
8486 static rtx
8487 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8488 const_tree valtype)
8490 unsigned int regno = AX_REG;
8492 if (TARGET_SSE)
8494 switch (GET_MODE_SIZE (mode))
8496 case 16:
8497 if (valtype != NULL_TREE
8498 && !VECTOR_INTEGER_TYPE_P (valtype)
8499 && !VECTOR_INTEGER_TYPE_P (valtype)
8500 && !INTEGRAL_TYPE_P (valtype)
8501 && !VECTOR_FLOAT_TYPE_P (valtype))
8502 break;
8503 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8504 && !COMPLEX_MODE_P (mode))
8505 regno = FIRST_SSE_REG;
8506 break;
8507 case 8:
8508 case 4:
8509 if (mode == SFmode || mode == DFmode)
8510 regno = FIRST_SSE_REG;
8511 break;
8512 default:
8513 break;
8516 return gen_rtx_REG (orig_mode, regno);
8519 static rtx
8520 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8521 machine_mode orig_mode, machine_mode mode)
8523 const_tree fn, fntype;
8525 fn = NULL_TREE;
8526 if (fntype_or_decl && DECL_P (fntype_or_decl))
8527 fn = fntype_or_decl;
8528 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8530 if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8531 || POINTER_BOUNDS_MODE_P (mode))
8532 return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8533 else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8534 return function_value_ms_64 (orig_mode, mode, valtype);
8535 else if (TARGET_64BIT)
8536 return function_value_64 (orig_mode, mode, valtype);
8537 else
8538 return function_value_32 (orig_mode, mode, fntype, fn);
8541 static rtx
8542 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8544 machine_mode mode, orig_mode;
8546 orig_mode = TYPE_MODE (valtype);
8547 mode = type_natural_mode (valtype, NULL, true);
8548 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8551 /* Return an RTX representing a place where a function returns
8552 or recieves pointer bounds or NULL if no bounds are returned.
8554 VALTYPE is a data type of a value returned by the function.
8556 FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8557 or FUNCTION_TYPE of the function.
8559 If OUTGOING is false, return a place in which the caller will
8560 see the return value. Otherwise, return a place where a
8561 function returns a value. */
8563 static rtx
8564 ix86_function_value_bounds (const_tree valtype,
8565 const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8566 bool outgoing ATTRIBUTE_UNUSED)
8568 rtx res = NULL_RTX;
8570 if (BOUNDED_TYPE_P (valtype))
8571 res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8572 else if (chkp_type_has_pointer (valtype))
8574 bitmap slots;
8575 rtx bounds[2];
8576 bitmap_iterator bi;
8577 unsigned i, bnd_no = 0;
8579 bitmap_obstack_initialize (NULL);
8580 slots = BITMAP_ALLOC (NULL);
8581 chkp_find_bound_slots (valtype, slots);
8583 EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8585 rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8586 rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8587 gcc_assert (bnd_no < 2);
8588 bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8591 res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8593 BITMAP_FREE (slots);
8594 bitmap_obstack_release (NULL);
8596 else
8597 res = NULL_RTX;
8599 return res;
8602 /* Pointer function arguments and return values are promoted to
8603 word_mode. */
8605 static machine_mode
8606 ix86_promote_function_mode (const_tree type, machine_mode mode,
8607 int *punsignedp, const_tree fntype,
8608 int for_return)
8610 if (type != NULL_TREE && POINTER_TYPE_P (type))
8612 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8613 return word_mode;
8615 return default_promote_function_mode (type, mode, punsignedp, fntype,
8616 for_return);
8619 /* Return true if a structure, union or array with MODE containing FIELD
8620 should be accessed using BLKmode. */
8622 static bool
8623 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8625 /* Union with XFmode must be in BLKmode. */
8626 return (mode == XFmode
8627 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8628 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8632 ix86_libcall_value (machine_mode mode)
8634 return ix86_function_value_1 (NULL, NULL, mode, mode);
8637 /* Return true iff type is returned in memory. */
8639 static bool
8640 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8642 #ifdef SUBTARGET_RETURN_IN_MEMORY
8643 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8644 #else
8645 const machine_mode mode = type_natural_mode (type, NULL, true);
8646 HOST_WIDE_INT size;
8648 if (POINTER_BOUNDS_TYPE_P (type))
8649 return false;
8651 if (TARGET_64BIT)
8653 if (ix86_function_type_abi (fntype) == MS_ABI)
8655 size = int_size_in_bytes (type);
8657 /* __m128 is returned in xmm0. */
8658 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8659 || INTEGRAL_TYPE_P (type)
8660 || VECTOR_FLOAT_TYPE_P (type))
8661 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8662 && !COMPLEX_MODE_P (mode)
8663 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8664 return false;
8666 /* Otherwise, the size must be exactly in [1248]. */
8667 return size != 1 && size != 2 && size != 4 && size != 8;
8669 else
8671 int needed_intregs, needed_sseregs;
8673 return examine_argument (mode, type, 1,
8674 &needed_intregs, &needed_sseregs);
8677 else
8679 size = int_size_in_bytes (type);
8681 /* Intel MCU psABI returns scalars and aggregates no larger than 8
8682 bytes in registers. */
8683 if (TARGET_IAMCU)
8684 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
8686 if (mode == BLKmode)
8687 return true;
8689 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8690 return false;
8692 if (VECTOR_MODE_P (mode) || mode == TImode)
8694 /* User-created vectors small enough to fit in EAX. */
8695 if (size < 8)
8696 return false;
8698 /* Unless ABI prescibes otherwise,
8699 MMX/3dNow values are returned in MM0 if available. */
8701 if (size == 8)
8702 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8704 /* SSE values are returned in XMM0 if available. */
8705 if (size == 16)
8706 return !TARGET_SSE;
8708 /* AVX values are returned in YMM0 if available. */
8709 if (size == 32)
8710 return !TARGET_AVX;
8712 /* AVX512F values are returned in ZMM0 if available. */
8713 if (size == 64)
8714 return !TARGET_AVX512F;
8717 if (mode == XFmode)
8718 return false;
8720 if (size > 12)
8721 return true;
8723 /* OImode shouldn't be used directly. */
8724 gcc_assert (mode != OImode);
8726 return false;
8728 #endif
8732 /* Create the va_list data type. */
8734 static tree
8735 ix86_build_builtin_va_list_64 (void)
8737 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8739 record = lang_hooks.types.make_type (RECORD_TYPE);
8740 type_decl = build_decl (BUILTINS_LOCATION,
8741 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8743 f_gpr = build_decl (BUILTINS_LOCATION,
8744 FIELD_DECL, get_identifier ("gp_offset"),
8745 unsigned_type_node);
8746 f_fpr = build_decl (BUILTINS_LOCATION,
8747 FIELD_DECL, get_identifier ("fp_offset"),
8748 unsigned_type_node);
8749 f_ovf = build_decl (BUILTINS_LOCATION,
8750 FIELD_DECL, get_identifier ("overflow_arg_area"),
8751 ptr_type_node);
8752 f_sav = build_decl (BUILTINS_LOCATION,
8753 FIELD_DECL, get_identifier ("reg_save_area"),
8754 ptr_type_node);
8756 va_list_gpr_counter_field = f_gpr;
8757 va_list_fpr_counter_field = f_fpr;
8759 DECL_FIELD_CONTEXT (f_gpr) = record;
8760 DECL_FIELD_CONTEXT (f_fpr) = record;
8761 DECL_FIELD_CONTEXT (f_ovf) = record;
8762 DECL_FIELD_CONTEXT (f_sav) = record;
8764 TYPE_STUB_DECL (record) = type_decl;
8765 TYPE_NAME (record) = type_decl;
8766 TYPE_FIELDS (record) = f_gpr;
8767 DECL_CHAIN (f_gpr) = f_fpr;
8768 DECL_CHAIN (f_fpr) = f_ovf;
8769 DECL_CHAIN (f_ovf) = f_sav;
8771 layout_type (record);
8773 /* The correct type is an array type of one element. */
8774 return build_array_type (record, build_index_type (size_zero_node));
8777 /* Setup the builtin va_list data type and for 64-bit the additional
8778 calling convention specific va_list data types. */
8780 static tree
8781 ix86_build_builtin_va_list (void)
8783 if (TARGET_64BIT)
8785 /* Initialize ABI specific va_list builtin types. */
8786 tree sysv_va_list, ms_va_list;
8788 sysv_va_list = ix86_build_builtin_va_list_64 ();
8789 sysv_va_list_type_node = build_variant_type_copy (sysv_va_list);
8791 /* For MS_ABI we use plain pointer to argument area. */
8792 ms_va_list = build_pointer_type (char_type_node);
8793 ms_va_list_type_node = build_variant_type_copy (ms_va_list);
8795 return (ix86_abi == MS_ABI) ? ms_va_list : sysv_va_list;
8797 else
8799 /* For i386 we use plain pointer to argument area. */
8800 return build_pointer_type (char_type_node);
8804 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8806 static void
8807 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8809 rtx save_area, mem;
8810 alias_set_type set;
8811 int i, max;
8813 /* GPR size of varargs save area. */
8814 if (cfun->va_list_gpr_size)
8815 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8816 else
8817 ix86_varargs_gpr_size = 0;
8819 /* FPR size of varargs save area. We don't need it if we don't pass
8820 anything in SSE registers. */
8821 if (TARGET_SSE && cfun->va_list_fpr_size)
8822 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8823 else
8824 ix86_varargs_fpr_size = 0;
8826 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8827 return;
8829 save_area = frame_pointer_rtx;
8830 set = get_varargs_alias_set ();
8832 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8833 if (max > X86_64_REGPARM_MAX)
8834 max = X86_64_REGPARM_MAX;
8836 for (i = cum->regno; i < max; i++)
8838 mem = gen_rtx_MEM (word_mode,
8839 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8840 MEM_NOTRAP_P (mem) = 1;
8841 set_mem_alias_set (mem, set);
8842 emit_move_insn (mem,
8843 gen_rtx_REG (word_mode,
8844 x86_64_int_parameter_registers[i]));
8847 if (ix86_varargs_fpr_size)
8849 machine_mode smode;
8850 rtx_code_label *label;
8851 rtx test;
8853 /* Now emit code to save SSE registers. The AX parameter contains number
8854 of SSE parameter registers used to call this function, though all we
8855 actually check here is the zero/non-zero status. */
8857 label = gen_label_rtx ();
8858 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8859 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8860 label));
8862 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8863 we used movdqa (i.e. TImode) instead? Perhaps even better would
8864 be if we could determine the real mode of the data, via a hook
8865 into pass_stdarg. Ignore all that for now. */
8866 smode = V4SFmode;
8867 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8868 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8870 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8871 if (max > X86_64_SSE_REGPARM_MAX)
8872 max = X86_64_SSE_REGPARM_MAX;
8874 for (i = cum->sse_regno; i < max; ++i)
8876 mem = plus_constant (Pmode, save_area,
8877 i * 16 + ix86_varargs_gpr_size);
8878 mem = gen_rtx_MEM (smode, mem);
8879 MEM_NOTRAP_P (mem) = 1;
8880 set_mem_alias_set (mem, set);
8881 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8883 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8886 emit_label (label);
8890 static void
8891 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8893 alias_set_type set = get_varargs_alias_set ();
8894 int i;
8896 /* Reset to zero, as there might be a sysv vaarg used
8897 before. */
8898 ix86_varargs_gpr_size = 0;
8899 ix86_varargs_fpr_size = 0;
8901 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8903 rtx reg, mem;
8905 mem = gen_rtx_MEM (Pmode,
8906 plus_constant (Pmode, virtual_incoming_args_rtx,
8907 i * UNITS_PER_WORD));
8908 MEM_NOTRAP_P (mem) = 1;
8909 set_mem_alias_set (mem, set);
8911 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8912 emit_move_insn (mem, reg);
8916 static void
8917 ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8918 tree type, int *, int no_rtl)
8920 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8921 CUMULATIVE_ARGS next_cum;
8922 tree fntype;
8924 /* This argument doesn't appear to be used anymore. Which is good,
8925 because the old code here didn't suppress rtl generation. */
8926 gcc_assert (!no_rtl);
8928 if (!TARGET_64BIT)
8929 return;
8931 fntype = TREE_TYPE (current_function_decl);
8933 /* For varargs, we do not want to skip the dummy va_dcl argument.
8934 For stdargs, we do want to skip the last named argument. */
8935 next_cum = *cum;
8936 if (stdarg_p (fntype))
8937 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8938 true);
8940 if (cum->call_abi == MS_ABI)
8941 setup_incoming_varargs_ms_64 (&next_cum);
8942 else
8943 setup_incoming_varargs_64 (&next_cum);
8946 static void
8947 ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8948 enum machine_mode mode,
8949 tree type,
8950 int *pretend_size ATTRIBUTE_UNUSED,
8951 int no_rtl)
8953 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8954 CUMULATIVE_ARGS next_cum;
8955 tree fntype;
8956 rtx save_area;
8957 int bnd_reg, i, max;
8959 gcc_assert (!no_rtl);
8961 /* Do nothing if we use plain pointer to argument area. */
8962 if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8963 return;
8965 fntype = TREE_TYPE (current_function_decl);
8967 /* For varargs, we do not want to skip the dummy va_dcl argument.
8968 For stdargs, we do want to skip the last named argument. */
8969 next_cum = *cum;
8970 if (stdarg_p (fntype))
8971 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8972 true);
8973 save_area = frame_pointer_rtx;
8975 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8976 if (max > X86_64_REGPARM_MAX)
8977 max = X86_64_REGPARM_MAX;
8979 bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8980 if (chkp_function_instrumented_p (current_function_decl))
8981 for (i = cum->regno; i < max; i++)
8983 rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8984 rtx ptr = gen_rtx_REG (Pmode,
8985 x86_64_int_parameter_registers[i]);
8986 rtx bounds;
8988 if (bnd_reg <= LAST_BND_REG)
8989 bounds = gen_rtx_REG (BNDmode, bnd_reg);
8990 else
8992 rtx ldx_addr =
8993 plus_constant (Pmode, arg_pointer_rtx,
8994 (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8995 bounds = gen_reg_rtx (BNDmode);
8996 emit_insn (BNDmode == BND64mode
8997 ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8998 : gen_bnd32_ldx (bounds, ldx_addr, ptr));
9001 emit_insn (BNDmode == BND64mode
9002 ? gen_bnd64_stx (addr, ptr, bounds)
9003 : gen_bnd32_stx (addr, ptr, bounds));
9005 bnd_reg++;
9010 /* Checks if TYPE is of kind va_list char *. */
9012 static bool
9013 is_va_list_char_pointer (tree type)
9015 tree canonic;
9017 /* For 32-bit it is always true. */
9018 if (!TARGET_64BIT)
9019 return true;
9020 canonic = ix86_canonical_va_list_type (type);
9021 return (canonic == ms_va_list_type_node
9022 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
9025 /* Implement va_start. */
9027 static void
9028 ix86_va_start (tree valist, rtx nextarg)
9030 HOST_WIDE_INT words, n_gpr, n_fpr;
9031 tree f_gpr, f_fpr, f_ovf, f_sav;
9032 tree gpr, fpr, ovf, sav, t;
9033 tree type;
9034 rtx ovf_rtx;
9036 if (flag_split_stack
9037 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9039 unsigned int scratch_regno;
9041 /* When we are splitting the stack, we can't refer to the stack
9042 arguments using internal_arg_pointer, because they may be on
9043 the old stack. The split stack prologue will arrange to
9044 leave a pointer to the old stack arguments in a scratch
9045 register, which we here copy to a pseudo-register. The split
9046 stack prologue can't set the pseudo-register directly because
9047 it (the prologue) runs before any registers have been saved. */
9049 scratch_regno = split_stack_prologue_scratch_regno ();
9050 if (scratch_regno != INVALID_REGNUM)
9052 rtx reg;
9053 rtx_insn *seq;
9055 reg = gen_reg_rtx (Pmode);
9056 cfun->machine->split_stack_varargs_pointer = reg;
9058 start_sequence ();
9059 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
9060 seq = get_insns ();
9061 end_sequence ();
9063 push_topmost_sequence ();
9064 emit_insn_after (seq, entry_of_function ());
9065 pop_topmost_sequence ();
9069 /* Only 64bit target needs something special. */
9070 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9072 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9073 std_expand_builtin_va_start (valist, nextarg);
9074 else
9076 rtx va_r, next;
9078 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
9079 next = expand_binop (ptr_mode, add_optab,
9080 cfun->machine->split_stack_varargs_pointer,
9081 crtl->args.arg_offset_rtx,
9082 NULL_RTX, 0, OPTAB_LIB_WIDEN);
9083 convert_move (va_r, next, 0);
9085 /* Store zero bounds for va_list. */
9086 if (chkp_function_instrumented_p (current_function_decl))
9087 chkp_expand_bounds_reset_for_mem (valist,
9088 make_tree (TREE_TYPE (valist),
9089 next));
9092 return;
9095 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9096 f_fpr = DECL_CHAIN (f_gpr);
9097 f_ovf = DECL_CHAIN (f_fpr);
9098 f_sav = DECL_CHAIN (f_ovf);
9100 valist = build_simple_mem_ref (valist);
9101 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9102 /* The following should be folded into the MEM_REF offset. */
9103 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9104 f_gpr, NULL_TREE);
9105 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9106 f_fpr, NULL_TREE);
9107 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9108 f_ovf, NULL_TREE);
9109 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9110 f_sav, NULL_TREE);
9112 /* Count number of gp and fp argument registers used. */
9113 words = crtl->args.info.words;
9114 n_gpr = crtl->args.info.regno;
9115 n_fpr = crtl->args.info.sse_regno;
9117 if (cfun->va_list_gpr_size)
9119 type = TREE_TYPE (gpr);
9120 t = build2 (MODIFY_EXPR, type,
9121 gpr, build_int_cst (type, n_gpr * 8));
9122 TREE_SIDE_EFFECTS (t) = 1;
9123 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9126 if (TARGET_SSE && cfun->va_list_fpr_size)
9128 type = TREE_TYPE (fpr);
9129 t = build2 (MODIFY_EXPR, type, fpr,
9130 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9131 TREE_SIDE_EFFECTS (t) = 1;
9132 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9135 /* Find the overflow area. */
9136 type = TREE_TYPE (ovf);
9137 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9138 ovf_rtx = crtl->args.internal_arg_pointer;
9139 else
9140 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9141 t = make_tree (type, ovf_rtx);
9142 if (words != 0)
9143 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9145 /* Store zero bounds for overflow area pointer. */
9146 if (chkp_function_instrumented_p (current_function_decl))
9147 chkp_expand_bounds_reset_for_mem (ovf, t);
9149 t = build2 (MODIFY_EXPR, type, ovf, t);
9150 TREE_SIDE_EFFECTS (t) = 1;
9151 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9153 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9155 /* Find the register save area.
9156 Prologue of the function save it right above stack frame. */
9157 type = TREE_TYPE (sav);
9158 t = make_tree (type, frame_pointer_rtx);
9159 if (!ix86_varargs_gpr_size)
9160 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9162 /* Store zero bounds for save area pointer. */
9163 if (chkp_function_instrumented_p (current_function_decl))
9164 chkp_expand_bounds_reset_for_mem (sav, t);
9166 t = build2 (MODIFY_EXPR, type, sav, t);
9167 TREE_SIDE_EFFECTS (t) = 1;
9168 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9172 /* Implement va_arg. */
9174 static tree
9175 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9176 gimple_seq *post_p)
9178 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9179 tree f_gpr, f_fpr, f_ovf, f_sav;
9180 tree gpr, fpr, ovf, sav, t;
9181 int size, rsize;
9182 tree lab_false, lab_over = NULL_TREE;
9183 tree addr, t2;
9184 rtx container;
9185 int indirect_p = 0;
9186 tree ptrtype;
9187 machine_mode nat_mode;
9188 unsigned int arg_boundary;
9190 /* Only 64bit target needs something special. */
9191 if (is_va_list_char_pointer (TREE_TYPE (valist)))
9192 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9194 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9195 f_fpr = DECL_CHAIN (f_gpr);
9196 f_ovf = DECL_CHAIN (f_fpr);
9197 f_sav = DECL_CHAIN (f_ovf);
9199 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9200 valist, f_gpr, NULL_TREE);
9202 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9203 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9204 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9206 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9207 if (indirect_p)
9208 type = build_pointer_type (type);
9209 size = int_size_in_bytes (type);
9210 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9212 nat_mode = type_natural_mode (type, NULL, false);
9213 switch (nat_mode)
9215 case V8SFmode:
9216 case V8SImode:
9217 case V32QImode:
9218 case V16HImode:
9219 case V4DFmode:
9220 case V4DImode:
9221 case V16SFmode:
9222 case V16SImode:
9223 case V64QImode:
9224 case V32HImode:
9225 case V8DFmode:
9226 case V8DImode:
9227 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
9228 if (!TARGET_64BIT_MS_ABI)
9230 container = NULL;
9231 break;
9234 default:
9235 container = construct_container (nat_mode, TYPE_MODE (type),
9236 type, 0, X86_64_REGPARM_MAX,
9237 X86_64_SSE_REGPARM_MAX, intreg,
9239 break;
9242 /* Pull the value out of the saved registers. */
9244 addr = create_tmp_var (ptr_type_node, "addr");
9246 if (container)
9248 int needed_intregs, needed_sseregs;
9249 bool need_temp;
9250 tree int_addr, sse_addr;
9252 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9253 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9255 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9257 need_temp = (!REG_P (container)
9258 && ((needed_intregs && TYPE_ALIGN (type) > 64)
9259 || TYPE_ALIGN (type) > 128));
9261 /* In case we are passing structure, verify that it is consecutive block
9262 on the register save area. If not we need to do moves. */
9263 if (!need_temp && !REG_P (container))
9265 /* Verify that all registers are strictly consecutive */
9266 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9268 int i;
9270 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9272 rtx slot = XVECEXP (container, 0, i);
9273 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9274 || INTVAL (XEXP (slot, 1)) != i * 16)
9275 need_temp = true;
9278 else
9280 int i;
9282 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9284 rtx slot = XVECEXP (container, 0, i);
9285 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9286 || INTVAL (XEXP (slot, 1)) != i * 8)
9287 need_temp = true;
9291 if (!need_temp)
9293 int_addr = addr;
9294 sse_addr = addr;
9296 else
9298 int_addr = create_tmp_var (ptr_type_node, "int_addr");
9299 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9302 /* First ensure that we fit completely in registers. */
9303 if (needed_intregs)
9305 t = build_int_cst (TREE_TYPE (gpr),
9306 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9307 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9308 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9309 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9310 gimplify_and_add (t, pre_p);
9312 if (needed_sseregs)
9314 t = build_int_cst (TREE_TYPE (fpr),
9315 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9316 + X86_64_REGPARM_MAX * 8);
9317 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9318 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9319 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9320 gimplify_and_add (t, pre_p);
9323 /* Compute index to start of area used for integer regs. */
9324 if (needed_intregs)
9326 /* int_addr = gpr + sav; */
9327 t = fold_build_pointer_plus (sav, gpr);
9328 gimplify_assign (int_addr, t, pre_p);
9330 if (needed_sseregs)
9332 /* sse_addr = fpr + sav; */
9333 t = fold_build_pointer_plus (sav, fpr);
9334 gimplify_assign (sse_addr, t, pre_p);
9336 if (need_temp)
9338 int i, prev_size = 0;
9339 tree temp = create_tmp_var (type, "va_arg_tmp");
9341 /* addr = &temp; */
9342 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9343 gimplify_assign (addr, t, pre_p);
9345 for (i = 0; i < XVECLEN (container, 0); i++)
9347 rtx slot = XVECEXP (container, 0, i);
9348 rtx reg = XEXP (slot, 0);
9349 machine_mode mode = GET_MODE (reg);
9350 tree piece_type;
9351 tree addr_type;
9352 tree daddr_type;
9353 tree src_addr, src;
9354 int src_offset;
9355 tree dest_addr, dest;
9356 int cur_size = GET_MODE_SIZE (mode);
9358 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9359 prev_size = INTVAL (XEXP (slot, 1));
9360 if (prev_size + cur_size > size)
9362 cur_size = size - prev_size;
9363 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9364 if (mode == BLKmode)
9365 mode = QImode;
9367 piece_type = lang_hooks.types.type_for_mode (mode, 1);
9368 if (mode == GET_MODE (reg))
9369 addr_type = build_pointer_type (piece_type);
9370 else
9371 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9372 true);
9373 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9374 true);
9376 if (SSE_REGNO_P (REGNO (reg)))
9378 src_addr = sse_addr;
9379 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9381 else
9383 src_addr = int_addr;
9384 src_offset = REGNO (reg) * 8;
9386 src_addr = fold_convert (addr_type, src_addr);
9387 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9389 dest_addr = fold_convert (daddr_type, addr);
9390 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9391 if (cur_size == GET_MODE_SIZE (mode))
9393 src = build_va_arg_indirect_ref (src_addr);
9394 dest = build_va_arg_indirect_ref (dest_addr);
9396 gimplify_assign (dest, src, pre_p);
9398 else
9400 tree copy
9401 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9402 3, dest_addr, src_addr,
9403 size_int (cur_size));
9404 gimplify_and_add (copy, pre_p);
9406 prev_size += cur_size;
9410 if (needed_intregs)
9412 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9413 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9414 gimplify_assign (gpr, t, pre_p);
9417 if (needed_sseregs)
9419 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9420 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9421 gimplify_assign (unshare_expr (fpr), t, pre_p);
9424 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9426 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9429 /* ... otherwise out of the overflow area. */
9431 /* When we align parameter on stack for caller, if the parameter
9432 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9433 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
9434 here with caller. */
9435 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9436 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9437 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9439 /* Care for on-stack alignment if needed. */
9440 if (arg_boundary <= 64 || size == 0)
9441 t = ovf;
9442 else
9444 HOST_WIDE_INT align = arg_boundary / 8;
9445 t = fold_build_pointer_plus_hwi (ovf, align - 1);
9446 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9447 build_int_cst (TREE_TYPE (t), -align));
9450 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9451 gimplify_assign (addr, t, pre_p);
9453 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9454 gimplify_assign (unshare_expr (ovf), t, pre_p);
9456 if (container)
9457 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9459 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9460 addr = fold_convert (ptrtype, addr);
9462 if (indirect_p)
9463 addr = build_va_arg_indirect_ref (addr);
9464 return build_va_arg_indirect_ref (addr);
9467 /* Return true if OPNUM's MEM should be matched
9468 in movabs* patterns. */
9470 bool
9471 ix86_check_movabs (rtx insn, int opnum)
9473 rtx set, mem;
9475 set = PATTERN (insn);
9476 if (GET_CODE (set) == PARALLEL)
9477 set = XVECEXP (set, 0, 0);
9478 gcc_assert (GET_CODE (set) == SET);
9479 mem = XEXP (set, opnum);
9480 while (GET_CODE (mem) == SUBREG)
9481 mem = SUBREG_REG (mem);
9482 gcc_assert (MEM_P (mem));
9483 return volatile_ok || !MEM_VOLATILE_P (mem);
9486 /* Initialize the table of extra 80387 mathematical constants. */
9488 static void
9489 init_ext_80387_constants (void)
9491 static const char * cst[5] =
9493 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
9494 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
9495 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
9496 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
9497 "3.1415926535897932385128089594061862044", /* 4: fldpi */
9499 int i;
9501 for (i = 0; i < 5; i++)
9503 real_from_string (&ext_80387_constants_table[i], cst[i]);
9504 /* Ensure each constant is rounded to XFmode precision. */
9505 real_convert (&ext_80387_constants_table[i],
9506 XFmode, &ext_80387_constants_table[i]);
9509 ext_80387_constants_init = 1;
9512 /* Return non-zero if the constant is something that
9513 can be loaded with a special instruction. */
9516 standard_80387_constant_p (rtx x)
9518 machine_mode mode = GET_MODE (x);
9520 REAL_VALUE_TYPE r;
9522 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
9523 return -1;
9525 if (x == CONST0_RTX (mode))
9526 return 1;
9527 if (x == CONST1_RTX (mode))
9528 return 2;
9530 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9532 /* For XFmode constants, try to find a special 80387 instruction when
9533 optimizing for size or on those CPUs that benefit from them. */
9534 if (mode == XFmode
9535 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9537 int i;
9539 if (! ext_80387_constants_init)
9540 init_ext_80387_constants ();
9542 for (i = 0; i < 5; i++)
9543 if (real_identical (&r, &ext_80387_constants_table[i]))
9544 return i + 3;
9547 /* Load of the constant -0.0 or -1.0 will be split as
9548 fldz;fchs or fld1;fchs sequence. */
9549 if (real_isnegzero (&r))
9550 return 8;
9551 if (real_identical (&r, &dconstm1))
9552 return 9;
9554 return 0;
9557 /* Return the opcode of the special instruction to be used to load
9558 the constant X. */
9560 const char *
9561 standard_80387_constant_opcode (rtx x)
9563 switch (standard_80387_constant_p (x))
9565 case 1:
9566 return "fldz";
9567 case 2:
9568 return "fld1";
9569 case 3:
9570 return "fldlg2";
9571 case 4:
9572 return "fldln2";
9573 case 5:
9574 return "fldl2e";
9575 case 6:
9576 return "fldl2t";
9577 case 7:
9578 return "fldpi";
9579 case 8:
9580 case 9:
9581 return "#";
9582 default:
9583 gcc_unreachable ();
9587 /* Return the CONST_DOUBLE representing the 80387 constant that is
9588 loaded by the specified special instruction. The argument IDX
9589 matches the return value from standard_80387_constant_p. */
9592 standard_80387_constant_rtx (int idx)
9594 int i;
9596 if (! ext_80387_constants_init)
9597 init_ext_80387_constants ();
9599 switch (idx)
9601 case 3:
9602 case 4:
9603 case 5:
9604 case 6:
9605 case 7:
9606 i = idx - 3;
9607 break;
9609 default:
9610 gcc_unreachable ();
9613 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9614 XFmode);
9617 /* Return 1 if X is all 0s and 2 if x is all 1s
9618 in supported SSE/AVX vector mode. */
9621 standard_sse_constant_p (rtx x)
9623 machine_mode mode;
9625 if (!TARGET_SSE)
9626 return 0;
9628 mode = GET_MODE (x);
9630 if (x == const0_rtx || x == CONST0_RTX (mode))
9631 return 1;
9632 if (vector_all_ones_operand (x, mode))
9633 switch (mode)
9635 case V16QImode:
9636 case V8HImode:
9637 case V4SImode:
9638 case V2DImode:
9639 if (TARGET_SSE2)
9640 return 2;
9641 case V32QImode:
9642 case V16HImode:
9643 case V8SImode:
9644 case V4DImode:
9645 if (TARGET_AVX2)
9646 return 2;
9647 case V64QImode:
9648 case V32HImode:
9649 case V16SImode:
9650 case V8DImode:
9651 if (TARGET_AVX512F)
9652 return 2;
9653 default:
9654 break;
9657 return 0;
9660 /* Return the opcode of the special instruction to be used to load
9661 the constant X. */
9663 const char *
9664 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9666 switch (standard_sse_constant_p (x))
9668 case 1:
9669 switch (get_attr_mode (insn))
9671 case MODE_XI:
9672 return "vpxord\t%g0, %g0, %g0";
9673 case MODE_V16SF:
9674 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9675 : "vpxord\t%g0, %g0, %g0";
9676 case MODE_V8DF:
9677 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9678 : "vpxorq\t%g0, %g0, %g0";
9679 case MODE_TI:
9680 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9681 : "%vpxor\t%0, %d0";
9682 case MODE_V2DF:
9683 return "%vxorpd\t%0, %d0";
9684 case MODE_V4SF:
9685 return "%vxorps\t%0, %d0";
9687 case MODE_OI:
9688 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9689 : "vpxor\t%x0, %x0, %x0";
9690 case MODE_V4DF:
9691 return "vxorpd\t%x0, %x0, %x0";
9692 case MODE_V8SF:
9693 return "vxorps\t%x0, %x0, %x0";
9695 default:
9696 break;
9699 case 2:
9700 if (TARGET_AVX512VL
9701 || get_attr_mode (insn) == MODE_XI
9702 || get_attr_mode (insn) == MODE_V8DF
9703 || get_attr_mode (insn) == MODE_V16SF)
9704 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9705 if (TARGET_AVX)
9706 return "vpcmpeqd\t%0, %0, %0";
9707 else
9708 return "pcmpeqd\t%0, %0";
9710 default:
9711 break;
9713 gcc_unreachable ();
9716 /* Returns true if OP contains a symbol reference */
9718 bool
9719 symbolic_reference_mentioned_p (rtx op)
9721 const char *fmt;
9722 int i;
9724 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9725 return true;
9727 fmt = GET_RTX_FORMAT (GET_CODE (op));
9728 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9730 if (fmt[i] == 'E')
9732 int j;
9734 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9735 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9736 return true;
9739 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9740 return true;
9743 return false;
9746 /* Return true if it is appropriate to emit `ret' instructions in the
9747 body of a function. Do this only if the epilogue is simple, needing a
9748 couple of insns. Prior to reloading, we can't tell how many registers
9749 must be saved, so return false then. Return false if there is no frame
9750 marker to de-allocate. */
9752 bool
9753 ix86_can_use_return_insn_p (void)
9755 struct ix86_frame frame;
9757 if (! reload_completed || frame_pointer_needed)
9758 return 0;
9760 /* Don't allow more than 32k pop, since that's all we can do
9761 with one instruction. */
9762 if (crtl->args.pops_args && crtl->args.size >= 32768)
9763 return 0;
9765 ix86_compute_frame_layout (&frame);
9766 return (frame.stack_pointer_offset == UNITS_PER_WORD
9767 && (frame.nregs + frame.nsseregs) == 0);
9770 /* Value should be nonzero if functions must have frame pointers.
9771 Zero means the frame pointer need not be set up (and parms may
9772 be accessed via the stack pointer) in functions that seem suitable. */
9774 static bool
9775 ix86_frame_pointer_required (void)
9777 /* If we accessed previous frames, then the generated code expects
9778 to be able to access the saved ebp value in our frame. */
9779 if (cfun->machine->accesses_prev_frame)
9780 return true;
9782 /* Several x86 os'es need a frame pointer for other reasons,
9783 usually pertaining to setjmp. */
9784 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9785 return true;
9787 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9788 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9789 return true;
9791 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9792 allocation is 4GB. */
9793 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9794 return true;
9796 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9797 turns off the frame pointer by default. Turn it back on now if
9798 we've not got a leaf function. */
9799 if (TARGET_OMIT_LEAF_FRAME_POINTER
9800 && (!crtl->is_leaf
9801 || ix86_current_function_calls_tls_descriptor))
9802 return true;
9804 if (crtl->profile && !flag_fentry)
9805 return true;
9807 return false;
9810 /* Record that the current function accesses previous call frames. */
9812 void
9813 ix86_setup_frame_addresses (void)
9815 cfun->machine->accesses_prev_frame = 1;
9818 #ifndef USE_HIDDEN_LINKONCE
9819 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9820 # define USE_HIDDEN_LINKONCE 1
9821 # else
9822 # define USE_HIDDEN_LINKONCE 0
9823 # endif
9824 #endif
9826 static int pic_labels_used;
9828 /* Fills in the label name that should be used for a pc thunk for
9829 the given register. */
9831 static void
9832 get_pc_thunk_name (char name[32], unsigned int regno)
9834 gcc_assert (!TARGET_64BIT);
9836 if (USE_HIDDEN_LINKONCE)
9837 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9838 else
9839 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9843 /* This function generates code for -fpic that loads %ebx with
9844 the return address of the caller and then returns. */
9846 static void
9847 ix86_code_end (void)
9849 rtx xops[2];
9850 int regno;
9852 for (regno = AX_REG; regno <= SP_REG; regno++)
9854 char name[32];
9855 tree decl;
9857 if (!(pic_labels_used & (1 << regno)))
9858 continue;
9860 get_pc_thunk_name (name, regno);
9862 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9863 get_identifier (name),
9864 build_function_type_list (void_type_node, NULL_TREE));
9865 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9866 NULL_TREE, void_type_node);
9867 TREE_PUBLIC (decl) = 1;
9868 TREE_STATIC (decl) = 1;
9869 DECL_IGNORED_P (decl) = 1;
9871 #if TARGET_MACHO
9872 if (TARGET_MACHO)
9874 switch_to_section (darwin_sections[text_coal_section]);
9875 fputs ("\t.weak_definition\t", asm_out_file);
9876 assemble_name (asm_out_file, name);
9877 fputs ("\n\t.private_extern\t", asm_out_file);
9878 assemble_name (asm_out_file, name);
9879 putc ('\n', asm_out_file);
9880 ASM_OUTPUT_LABEL (asm_out_file, name);
9881 DECL_WEAK (decl) = 1;
9883 else
9884 #endif
9885 if (USE_HIDDEN_LINKONCE)
9887 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9889 targetm.asm_out.unique_section (decl, 0);
9890 switch_to_section (get_named_section (decl, NULL, 0));
9892 targetm.asm_out.globalize_label (asm_out_file, name);
9893 fputs ("\t.hidden\t", asm_out_file);
9894 assemble_name (asm_out_file, name);
9895 putc ('\n', asm_out_file);
9896 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9898 else
9900 switch_to_section (text_section);
9901 ASM_OUTPUT_LABEL (asm_out_file, name);
9904 DECL_INITIAL (decl) = make_node (BLOCK);
9905 current_function_decl = decl;
9906 init_function_start (decl);
9907 first_function_block_is_cold = false;
9908 /* Make sure unwind info is emitted for the thunk if needed. */
9909 final_start_function (emit_barrier (), asm_out_file, 1);
9911 /* Pad stack IP move with 4 instructions (two NOPs count
9912 as one instruction). */
9913 if (TARGET_PAD_SHORT_FUNCTION)
9915 int i = 8;
9917 while (i--)
9918 fputs ("\tnop\n", asm_out_file);
9921 xops[0] = gen_rtx_REG (Pmode, regno);
9922 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9923 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9924 output_asm_insn ("%!ret", NULL);
9925 final_end_function ();
9926 init_insn_lengths ();
9927 free_after_compilation (cfun);
9928 set_cfun (NULL);
9929 current_function_decl = NULL;
9932 if (flag_split_stack)
9933 file_end_indicate_split_stack ();
9936 /* Emit code for the SET_GOT patterns. */
9938 const char *
9939 output_set_got (rtx dest, rtx label)
9941 rtx xops[3];
9943 xops[0] = dest;
9945 if (TARGET_VXWORKS_RTP && flag_pic)
9947 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9948 xops[2] = gen_rtx_MEM (Pmode,
9949 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9950 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9952 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9953 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9954 an unadorned address. */
9955 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9956 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9957 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9958 return "";
9961 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9963 if (!flag_pic)
9965 if (TARGET_MACHO)
9966 /* We don't need a pic base, we're not producing pic. */
9967 gcc_unreachable ();
9969 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9970 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9971 targetm.asm_out.internal_label (asm_out_file, "L",
9972 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9974 else
9976 char name[32];
9977 get_pc_thunk_name (name, REGNO (dest));
9978 pic_labels_used |= 1 << REGNO (dest);
9980 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9981 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9982 output_asm_insn ("%!call\t%X2", xops);
9984 #if TARGET_MACHO
9985 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9986 This is what will be referenced by the Mach-O PIC subsystem. */
9987 if (machopic_should_output_picbase_label () || !label)
9988 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9990 /* When we are restoring the pic base at the site of a nonlocal label,
9991 and we decided to emit the pic base above, we will still output a
9992 local label used for calculating the correction offset (even though
9993 the offset will be 0 in that case). */
9994 if (label)
9995 targetm.asm_out.internal_label (asm_out_file, "L",
9996 CODE_LABEL_NUMBER (label));
9997 #endif
10000 if (!TARGET_MACHO)
10001 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
10003 return "";
10006 /* Generate an "push" pattern for input ARG. */
10008 static rtx
10009 gen_push (rtx arg)
10011 struct machine_function *m = cfun->machine;
10013 if (m->fs.cfa_reg == stack_pointer_rtx)
10014 m->fs.cfa_offset += UNITS_PER_WORD;
10015 m->fs.sp_offset += UNITS_PER_WORD;
10017 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10018 arg = gen_rtx_REG (word_mode, REGNO (arg));
10020 return gen_rtx_SET (gen_rtx_MEM (word_mode,
10021 gen_rtx_PRE_DEC (Pmode,
10022 stack_pointer_rtx)),
10023 arg);
10026 /* Generate an "pop" pattern for input ARG. */
10028 static rtx
10029 gen_pop (rtx arg)
10031 if (REG_P (arg) && GET_MODE (arg) != word_mode)
10032 arg = gen_rtx_REG (word_mode, REGNO (arg));
10034 return gen_rtx_SET (arg,
10035 gen_rtx_MEM (word_mode,
10036 gen_rtx_POST_INC (Pmode,
10037 stack_pointer_rtx)));
10040 /* Return >= 0 if there is an unused call-clobbered register available
10041 for the entire function. */
10043 static unsigned int
10044 ix86_select_alt_pic_regnum (void)
10046 if (ix86_use_pseudo_pic_reg ())
10047 return INVALID_REGNUM;
10049 if (crtl->is_leaf
10050 && !crtl->profile
10051 && !ix86_current_function_calls_tls_descriptor)
10053 int i, drap;
10054 /* Can't use the same register for both PIC and DRAP. */
10055 if (crtl->drap_reg)
10056 drap = REGNO (crtl->drap_reg);
10057 else
10058 drap = -1;
10059 for (i = 2; i >= 0; --i)
10060 if (i != drap && !df_regs_ever_live_p (i))
10061 return i;
10064 return INVALID_REGNUM;
10067 /* Return TRUE if we need to save REGNO. */
10069 static bool
10070 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
10072 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
10073 && pic_offset_table_rtx)
10075 if (ix86_use_pseudo_pic_reg ())
10077 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
10078 _mcount in prologue. */
10079 if (!TARGET_64BIT && flag_pic && crtl->profile)
10080 return true;
10082 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10083 || crtl->profile
10084 || crtl->calls_eh_return
10085 || crtl->uses_const_pool
10086 || cfun->has_nonlocal_label)
10087 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
10090 if (crtl->calls_eh_return && maybe_eh_return)
10092 unsigned i;
10093 for (i = 0; ; i++)
10095 unsigned test = EH_RETURN_DATA_REGNO (i);
10096 if (test == INVALID_REGNUM)
10097 break;
10098 if (test == regno)
10099 return true;
10103 if (crtl->drap_reg
10104 && regno == REGNO (crtl->drap_reg)
10105 && !cfun->machine->no_drap_save_restore)
10106 return true;
10108 return (df_regs_ever_live_p (regno)
10109 && !call_used_regs[regno]
10110 && !fixed_regs[regno]
10111 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10114 /* Return number of saved general prupose registers. */
10116 static int
10117 ix86_nsaved_regs (void)
10119 int nregs = 0;
10120 int regno;
10122 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10123 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10124 nregs ++;
10125 return nregs;
10128 /* Return number of saved SSE registrers. */
10130 static int
10131 ix86_nsaved_sseregs (void)
10133 int nregs = 0;
10134 int regno;
10136 if (!TARGET_64BIT_MS_ABI)
10137 return 0;
10138 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10139 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10140 nregs ++;
10141 return nregs;
10144 /* Given FROM and TO register numbers, say whether this elimination is
10145 allowed. If stack alignment is needed, we can only replace argument
10146 pointer with hard frame pointer, or replace frame pointer with stack
10147 pointer. Otherwise, frame pointer elimination is automatically
10148 handled and all other eliminations are valid. */
10150 static bool
10151 ix86_can_eliminate (const int from, const int to)
10153 if (stack_realign_fp)
10154 return ((from == ARG_POINTER_REGNUM
10155 && to == HARD_FRAME_POINTER_REGNUM)
10156 || (from == FRAME_POINTER_REGNUM
10157 && to == STACK_POINTER_REGNUM));
10158 else
10159 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10162 /* Return the offset between two registers, one to be eliminated, and the other
10163 its replacement, at the start of a routine. */
10165 HOST_WIDE_INT
10166 ix86_initial_elimination_offset (int from, int to)
10168 struct ix86_frame frame;
10169 ix86_compute_frame_layout (&frame);
10171 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10172 return frame.hard_frame_pointer_offset;
10173 else if (from == FRAME_POINTER_REGNUM
10174 && to == HARD_FRAME_POINTER_REGNUM)
10175 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10176 else
10178 gcc_assert (to == STACK_POINTER_REGNUM);
10180 if (from == ARG_POINTER_REGNUM)
10181 return frame.stack_pointer_offset;
10183 gcc_assert (from == FRAME_POINTER_REGNUM);
10184 return frame.stack_pointer_offset - frame.frame_pointer_offset;
10188 /* In a dynamically-aligned function, we can't know the offset from
10189 stack pointer to frame pointer, so we must ensure that setjmp
10190 eliminates fp against the hard fp (%ebp) rather than trying to
10191 index from %esp up to the top of the frame across a gap that is
10192 of unknown (at compile-time) size. */
10193 static rtx
10194 ix86_builtin_setjmp_frame_value (void)
10196 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10199 /* When using -fsplit-stack, the allocation routines set a field in
10200 the TCB to the bottom of the stack plus this much space, measured
10201 in bytes. */
10203 #define SPLIT_STACK_AVAILABLE 256
10205 /* Fill structure ix86_frame about frame of currently computed function. */
10207 static void
10208 ix86_compute_frame_layout (struct ix86_frame *frame)
10210 unsigned HOST_WIDE_INT stack_alignment_needed;
10211 HOST_WIDE_INT offset;
10212 unsigned HOST_WIDE_INT preferred_alignment;
10213 HOST_WIDE_INT size = get_frame_size ();
10214 HOST_WIDE_INT to_allocate;
10216 frame->nregs = ix86_nsaved_regs ();
10217 frame->nsseregs = ix86_nsaved_sseregs ();
10219 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
10220 function prologues and leaf. */
10221 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10222 && (!crtl->is_leaf || cfun->calls_alloca != 0
10223 || ix86_current_function_calls_tls_descriptor))
10225 crtl->preferred_stack_boundary = 128;
10226 crtl->stack_alignment_needed = 128;
10228 /* preferred_stack_boundary is never updated for call
10229 expanded from tls descriptor. Update it here. We don't update it in
10230 expand stage because according to the comments before
10231 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
10232 away. */
10233 else if (ix86_current_function_calls_tls_descriptor
10234 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
10236 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
10237 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
10238 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
10241 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10242 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10244 gcc_assert (!size || stack_alignment_needed);
10245 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10246 gcc_assert (preferred_alignment <= stack_alignment_needed);
10248 /* For SEH we have to limit the amount of code movement into the prologue.
10249 At present we do this via a BLOCKAGE, at which point there's very little
10250 scheduling that can be done, which means that there's very little point
10251 in doing anything except PUSHs. */
10252 if (TARGET_SEH)
10253 cfun->machine->use_fast_prologue_epilogue = false;
10255 /* During reload iteration the amount of registers saved can change.
10256 Recompute the value as needed. Do not recompute when amount of registers
10257 didn't change as reload does multiple calls to the function and does not
10258 expect the decision to change within single iteration. */
10259 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10260 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10262 int count = frame->nregs;
10263 struct cgraph_node *node = cgraph_node::get (current_function_decl);
10265 cfun->machine->use_fast_prologue_epilogue_nregs = count;
10267 /* The fast prologue uses move instead of push to save registers. This
10268 is significantly longer, but also executes faster as modern hardware
10269 can execute the moves in parallel, but can't do that for push/pop.
10271 Be careful about choosing what prologue to emit: When function takes
10272 many instructions to execute we may use slow version as well as in
10273 case function is known to be outside hot spot (this is known with
10274 feedback only). Weight the size of function by number of registers
10275 to save as it is cheap to use one or two push instructions but very
10276 slow to use many of them. */
10277 if (count)
10278 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10279 if (node->frequency < NODE_FREQUENCY_NORMAL
10280 || (flag_branch_probabilities
10281 && node->frequency < NODE_FREQUENCY_HOT))
10282 cfun->machine->use_fast_prologue_epilogue = false;
10283 else
10284 cfun->machine->use_fast_prologue_epilogue
10285 = !expensive_function_p (count);
10288 frame->save_regs_using_mov
10289 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10290 /* If static stack checking is enabled and done with probes,
10291 the registers need to be saved before allocating the frame. */
10292 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10294 /* Skip return address. */
10295 offset = UNITS_PER_WORD;
10297 /* Skip pushed static chain. */
10298 if (ix86_static_chain_on_stack)
10299 offset += UNITS_PER_WORD;
10301 /* Skip saved base pointer. */
10302 if (frame_pointer_needed)
10303 offset += UNITS_PER_WORD;
10304 frame->hfp_save_offset = offset;
10306 /* The traditional frame pointer location is at the top of the frame. */
10307 frame->hard_frame_pointer_offset = offset;
10309 /* Register save area */
10310 offset += frame->nregs * UNITS_PER_WORD;
10311 frame->reg_save_offset = offset;
10313 /* On SEH target, registers are pushed just before the frame pointer
10314 location. */
10315 if (TARGET_SEH)
10316 frame->hard_frame_pointer_offset = offset;
10318 /* Align and set SSE register save area. */
10319 if (frame->nsseregs)
10321 /* The only ABI that has saved SSE registers (Win64) also has a
10322 16-byte aligned default stack, and thus we don't need to be
10323 within the re-aligned local stack frame to save them. */
10324 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
10325 offset = (offset + 16 - 1) & -16;
10326 offset += frame->nsseregs * 16;
10328 frame->sse_reg_save_offset = offset;
10330 /* The re-aligned stack starts here. Values before this point are not
10331 directly comparable with values below this point. In order to make
10332 sure that no value happens to be the same before and after, force
10333 the alignment computation below to add a non-zero value. */
10334 if (stack_realign_fp)
10335 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
10337 /* Va-arg area */
10338 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10339 offset += frame->va_arg_size;
10341 /* Align start of frame for local function. */
10342 if (stack_realign_fp
10343 || offset != frame->sse_reg_save_offset
10344 || size != 0
10345 || !crtl->is_leaf
10346 || cfun->calls_alloca
10347 || ix86_current_function_calls_tls_descriptor)
10348 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10350 /* Frame pointer points here. */
10351 frame->frame_pointer_offset = offset;
10353 offset += size;
10355 /* Add outgoing arguments area. Can be skipped if we eliminated
10356 all the function calls as dead code.
10357 Skipping is however impossible when function calls alloca. Alloca
10358 expander assumes that last crtl->outgoing_args_size
10359 of stack frame are unused. */
10360 if (ACCUMULATE_OUTGOING_ARGS
10361 && (!crtl->is_leaf || cfun->calls_alloca
10362 || ix86_current_function_calls_tls_descriptor))
10364 offset += crtl->outgoing_args_size;
10365 frame->outgoing_arguments_size = crtl->outgoing_args_size;
10367 else
10368 frame->outgoing_arguments_size = 0;
10370 /* Align stack boundary. Only needed if we're calling another function
10371 or using alloca. */
10372 if (!crtl->is_leaf || cfun->calls_alloca
10373 || ix86_current_function_calls_tls_descriptor)
10374 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10376 /* We've reached end of stack frame. */
10377 frame->stack_pointer_offset = offset;
10379 /* Size prologue needs to allocate. */
10380 to_allocate = offset - frame->sse_reg_save_offset;
10382 if ((!to_allocate && frame->nregs <= 1)
10383 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10384 frame->save_regs_using_mov = false;
10386 if (ix86_using_red_zone ()
10387 && crtl->sp_is_unchanging
10388 && crtl->is_leaf
10389 && !ix86_current_function_calls_tls_descriptor)
10391 frame->red_zone_size = to_allocate;
10392 if (frame->save_regs_using_mov)
10393 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10394 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10395 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10397 else
10398 frame->red_zone_size = 0;
10399 frame->stack_pointer_offset -= frame->red_zone_size;
10401 /* The SEH frame pointer location is near the bottom of the frame.
10402 This is enforced by the fact that the difference between the
10403 stack pointer and the frame pointer is limited to 240 bytes in
10404 the unwind data structure. */
10405 if (TARGET_SEH)
10407 HOST_WIDE_INT diff;
10409 /* If we can leave the frame pointer where it is, do so. Also, returns
10410 the establisher frame for __builtin_frame_address (0). */
10411 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10412 if (diff <= SEH_MAX_FRAME_SIZE
10413 && (diff > 240 || (diff & 15) != 0)
10414 && !crtl->accesses_prior_frames)
10416 /* Ideally we'd determine what portion of the local stack frame
10417 (within the constraint of the lowest 240) is most heavily used.
10418 But without that complication, simply bias the frame pointer
10419 by 128 bytes so as to maximize the amount of the local stack
10420 frame that is addressable with 8-bit offsets. */
10421 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10426 /* This is semi-inlined memory_address_length, but simplified
10427 since we know that we're always dealing with reg+offset, and
10428 to avoid having to create and discard all that rtl. */
10430 static inline int
10431 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10433 int len = 4;
10435 if (offset == 0)
10437 /* EBP and R13 cannot be encoded without an offset. */
10438 len = (regno == BP_REG || regno == R13_REG);
10440 else if (IN_RANGE (offset, -128, 127))
10441 len = 1;
10443 /* ESP and R12 must be encoded with a SIB byte. */
10444 if (regno == SP_REG || regno == R12_REG)
10445 len++;
10447 return len;
10450 /* Return an RTX that points to CFA_OFFSET within the stack frame.
10451 The valid base registers are taken from CFUN->MACHINE->FS. */
10453 static rtx
10454 choose_baseaddr (HOST_WIDE_INT cfa_offset)
10456 const struct machine_function *m = cfun->machine;
10457 rtx base_reg = NULL;
10458 HOST_WIDE_INT base_offset = 0;
10460 if (m->use_fast_prologue_epilogue)
10462 /* Choose the base register most likely to allow the most scheduling
10463 opportunities. Generally FP is valid throughout the function,
10464 while DRAP must be reloaded within the epilogue. But choose either
10465 over the SP due to increased encoding size. */
10467 if (m->fs.fp_valid)
10469 base_reg = hard_frame_pointer_rtx;
10470 base_offset = m->fs.fp_offset - cfa_offset;
10472 else if (m->fs.drap_valid)
10474 base_reg = crtl->drap_reg;
10475 base_offset = 0 - cfa_offset;
10477 else if (m->fs.sp_valid)
10479 base_reg = stack_pointer_rtx;
10480 base_offset = m->fs.sp_offset - cfa_offset;
10483 else
10485 HOST_WIDE_INT toffset;
10486 int len = 16, tlen;
10488 /* Choose the base register with the smallest address encoding.
10489 With a tie, choose FP > DRAP > SP. */
10490 if (m->fs.sp_valid)
10492 base_reg = stack_pointer_rtx;
10493 base_offset = m->fs.sp_offset - cfa_offset;
10494 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10496 if (m->fs.drap_valid)
10498 toffset = 0 - cfa_offset;
10499 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10500 if (tlen <= len)
10502 base_reg = crtl->drap_reg;
10503 base_offset = toffset;
10504 len = tlen;
10507 if (m->fs.fp_valid)
10509 toffset = m->fs.fp_offset - cfa_offset;
10510 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10511 if (tlen <= len)
10513 base_reg = hard_frame_pointer_rtx;
10514 base_offset = toffset;
10515 len = tlen;
10519 gcc_assert (base_reg != NULL);
10521 return plus_constant (Pmode, base_reg, base_offset);
10524 /* Emit code to save registers in the prologue. */
10526 static void
10527 ix86_emit_save_regs (void)
10529 unsigned int regno;
10530 rtx_insn *insn;
10532 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10533 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10535 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10536 RTX_FRAME_RELATED_P (insn) = 1;
10540 /* Emit a single register save at CFA - CFA_OFFSET. */
10542 static void
10543 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10544 HOST_WIDE_INT cfa_offset)
10546 struct machine_function *m = cfun->machine;
10547 rtx reg = gen_rtx_REG (mode, regno);
10548 rtx mem, addr, base, insn;
10550 addr = choose_baseaddr (cfa_offset);
10551 mem = gen_frame_mem (mode, addr);
10553 /* For SSE saves, we need to indicate the 128-bit alignment. */
10554 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
10556 insn = emit_move_insn (mem, reg);
10557 RTX_FRAME_RELATED_P (insn) = 1;
10559 base = addr;
10560 if (GET_CODE (base) == PLUS)
10561 base = XEXP (base, 0);
10562 gcc_checking_assert (REG_P (base));
10564 /* When saving registers into a re-aligned local stack frame, avoid
10565 any tricky guessing by dwarf2out. */
10566 if (m->fs.realigned)
10568 gcc_checking_assert (stack_realign_drap);
10570 if (regno == REGNO (crtl->drap_reg))
10572 /* A bit of a hack. We force the DRAP register to be saved in
10573 the re-aligned stack frame, which provides us with a copy
10574 of the CFA that will last past the prologue. Install it. */
10575 gcc_checking_assert (cfun->machine->fs.fp_valid);
10576 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10577 cfun->machine->fs.fp_offset - cfa_offset);
10578 mem = gen_rtx_MEM (mode, addr);
10579 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10581 else
10583 /* The frame pointer is a stable reference within the
10584 aligned frame. Use it. */
10585 gcc_checking_assert (cfun->machine->fs.fp_valid);
10586 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10587 cfun->machine->fs.fp_offset - cfa_offset);
10588 mem = gen_rtx_MEM (mode, addr);
10589 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
10593 /* The memory may not be relative to the current CFA register,
10594 which means that we may need to generate a new pattern for
10595 use by the unwind info. */
10596 else if (base != m->fs.cfa_reg)
10598 addr = plus_constant (Pmode, m->fs.cfa_reg,
10599 m->fs.cfa_offset - cfa_offset);
10600 mem = gen_rtx_MEM (mode, addr);
10601 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
10605 /* Emit code to save registers using MOV insns.
10606 First register is stored at CFA - CFA_OFFSET. */
10607 static void
10608 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10610 unsigned int regno;
10612 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10613 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10615 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10616 cfa_offset -= UNITS_PER_WORD;
10620 /* Emit code to save SSE registers using MOV insns.
10621 First register is stored at CFA - CFA_OFFSET. */
10622 static void
10623 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10625 unsigned int regno;
10627 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10628 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10630 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10631 cfa_offset -= 16;
10635 static GTY(()) rtx queued_cfa_restores;
10637 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10638 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10639 Don't add the note if the previously saved value will be left untouched
10640 within stack red-zone till return, as unwinders can find the same value
10641 in the register and on the stack. */
10643 static void
10644 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
10646 if (!crtl->shrink_wrapped
10647 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10648 return;
10650 if (insn)
10652 add_reg_note (insn, REG_CFA_RESTORE, reg);
10653 RTX_FRAME_RELATED_P (insn) = 1;
10655 else
10656 queued_cfa_restores
10657 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10660 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10662 static void
10663 ix86_add_queued_cfa_restore_notes (rtx insn)
10665 rtx last;
10666 if (!queued_cfa_restores)
10667 return;
10668 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10670 XEXP (last, 1) = REG_NOTES (insn);
10671 REG_NOTES (insn) = queued_cfa_restores;
10672 queued_cfa_restores = NULL_RTX;
10673 RTX_FRAME_RELATED_P (insn) = 1;
10676 /* Expand prologue or epilogue stack adjustment.
10677 The pattern exist to put a dependency on all ebp-based memory accesses.
10678 STYLE should be negative if instructions should be marked as frame related,
10679 zero if %r11 register is live and cannot be freely used and positive
10680 otherwise. */
10682 static void
10683 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10684 int style, bool set_cfa)
10686 struct machine_function *m = cfun->machine;
10687 rtx insn;
10688 bool add_frame_related_expr = false;
10690 if (Pmode == SImode)
10691 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10692 else if (x86_64_immediate_operand (offset, DImode))
10693 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10694 else
10696 rtx tmp;
10697 /* r11 is used by indirect sibcall return as well, set before the
10698 epilogue and used after the epilogue. */
10699 if (style)
10700 tmp = gen_rtx_REG (DImode, R11_REG);
10701 else
10703 gcc_assert (src != hard_frame_pointer_rtx
10704 && dest != hard_frame_pointer_rtx);
10705 tmp = hard_frame_pointer_rtx;
10707 insn = emit_insn (gen_rtx_SET (tmp, offset));
10708 if (style < 0)
10709 add_frame_related_expr = true;
10711 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10714 insn = emit_insn (insn);
10715 if (style >= 0)
10716 ix86_add_queued_cfa_restore_notes (insn);
10718 if (set_cfa)
10720 rtx r;
10722 gcc_assert (m->fs.cfa_reg == src);
10723 m->fs.cfa_offset += INTVAL (offset);
10724 m->fs.cfa_reg = dest;
10726 r = gen_rtx_PLUS (Pmode, src, offset);
10727 r = gen_rtx_SET (dest, r);
10728 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10729 RTX_FRAME_RELATED_P (insn) = 1;
10731 else if (style < 0)
10733 RTX_FRAME_RELATED_P (insn) = 1;
10734 if (add_frame_related_expr)
10736 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10737 r = gen_rtx_SET (dest, r);
10738 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10742 if (dest == stack_pointer_rtx)
10744 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10745 bool valid = m->fs.sp_valid;
10747 if (src == hard_frame_pointer_rtx)
10749 valid = m->fs.fp_valid;
10750 ooffset = m->fs.fp_offset;
10752 else if (src == crtl->drap_reg)
10754 valid = m->fs.drap_valid;
10755 ooffset = 0;
10757 else
10759 /* Else there are two possibilities: SP itself, which we set
10760 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10761 taken care of this by hand along the eh_return path. */
10762 gcc_checking_assert (src == stack_pointer_rtx
10763 || offset == const0_rtx);
10766 m->fs.sp_offset = ooffset - INTVAL (offset);
10767 m->fs.sp_valid = valid;
10771 /* Find an available register to be used as dynamic realign argument
10772 pointer regsiter. Such a register will be written in prologue and
10773 used in begin of body, so it must not be
10774 1. parameter passing register.
10775 2. GOT pointer.
10776 We reuse static-chain register if it is available. Otherwise, we
10777 use DI for i386 and R13 for x86-64. We chose R13 since it has
10778 shorter encoding.
10780 Return: the regno of chosen register. */
10782 static unsigned int
10783 find_drap_reg (void)
10785 tree decl = cfun->decl;
10787 if (TARGET_64BIT)
10789 /* Use R13 for nested function or function need static chain.
10790 Since function with tail call may use any caller-saved
10791 registers in epilogue, DRAP must not use caller-saved
10792 register in such case. */
10793 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10794 return R13_REG;
10796 return R10_REG;
10798 else
10800 /* Use DI for nested function or function need static chain.
10801 Since function with tail call may use any caller-saved
10802 registers in epilogue, DRAP must not use caller-saved
10803 register in such case. */
10804 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10805 return DI_REG;
10807 /* Reuse static chain register if it isn't used for parameter
10808 passing. */
10809 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10811 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10812 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10813 return CX_REG;
10815 return DI_REG;
10819 /* Return minimum incoming stack alignment. */
10821 static unsigned int
10822 ix86_minimum_incoming_stack_boundary (bool sibcall)
10824 unsigned int incoming_stack_boundary;
10826 /* Prefer the one specified at command line. */
10827 if (ix86_user_incoming_stack_boundary)
10828 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10829 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10830 if -mstackrealign is used, it isn't used for sibcall check and
10831 estimated stack alignment is 128bit. */
10832 else if (!sibcall
10833 && !TARGET_64BIT
10834 && ix86_force_align_arg_pointer
10835 && crtl->stack_alignment_estimated == 128)
10836 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10837 else
10838 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10840 /* Incoming stack alignment can be changed on individual functions
10841 via force_align_arg_pointer attribute. We use the smallest
10842 incoming stack boundary. */
10843 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10844 && lookup_attribute (ix86_force_align_arg_pointer_string,
10845 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10846 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10848 /* The incoming stack frame has to be aligned at least at
10849 parm_stack_boundary. */
10850 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10851 incoming_stack_boundary = crtl->parm_stack_boundary;
10853 /* Stack at entrance of main is aligned by runtime. We use the
10854 smallest incoming stack boundary. */
10855 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10856 && DECL_NAME (current_function_decl)
10857 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10858 && DECL_FILE_SCOPE_P (current_function_decl))
10859 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10861 return incoming_stack_boundary;
10864 /* Update incoming stack boundary and estimated stack alignment. */
10866 static void
10867 ix86_update_stack_boundary (void)
10869 ix86_incoming_stack_boundary
10870 = ix86_minimum_incoming_stack_boundary (false);
10872 /* x86_64 vararg needs 16byte stack alignment for register save
10873 area. */
10874 if (TARGET_64BIT
10875 && cfun->stdarg
10876 && crtl->stack_alignment_estimated < 128)
10877 crtl->stack_alignment_estimated = 128;
10880 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10881 needed or an rtx for DRAP otherwise. */
10883 static rtx
10884 ix86_get_drap_rtx (void)
10886 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10887 crtl->need_drap = true;
10889 if (stack_realign_drap)
10891 /* Assign DRAP to vDRAP and returns vDRAP */
10892 unsigned int regno = find_drap_reg ();
10893 rtx drap_vreg;
10894 rtx arg_ptr;
10895 rtx_insn *seq, *insn;
10897 arg_ptr = gen_rtx_REG (Pmode, regno);
10898 crtl->drap_reg = arg_ptr;
10900 start_sequence ();
10901 drap_vreg = copy_to_reg (arg_ptr);
10902 seq = get_insns ();
10903 end_sequence ();
10905 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10906 if (!optimize)
10908 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10909 RTX_FRAME_RELATED_P (insn) = 1;
10911 return drap_vreg;
10913 else
10914 return NULL;
10917 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10919 static rtx
10920 ix86_internal_arg_pointer (void)
10922 return virtual_incoming_args_rtx;
10925 struct scratch_reg {
10926 rtx reg;
10927 bool saved;
10930 /* Return a short-lived scratch register for use on function entry.
10931 In 32-bit mode, it is valid only after the registers are saved
10932 in the prologue. This register must be released by means of
10933 release_scratch_register_on_entry once it is dead. */
10935 static void
10936 get_scratch_register_on_entry (struct scratch_reg *sr)
10938 int regno;
10940 sr->saved = false;
10942 if (TARGET_64BIT)
10944 /* We always use R11 in 64-bit mode. */
10945 regno = R11_REG;
10947 else
10949 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10950 bool fastcall_p
10951 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10952 bool thiscall_p
10953 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10954 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10955 int regparm = ix86_function_regparm (fntype, decl);
10956 int drap_regno
10957 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10959 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10960 for the static chain register. */
10961 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10962 && drap_regno != AX_REG)
10963 regno = AX_REG;
10964 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10965 for the static chain register. */
10966 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10967 regno = AX_REG;
10968 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10969 regno = DX_REG;
10970 /* ecx is the static chain register. */
10971 else if (regparm < 3 && !fastcall_p && !thiscall_p
10972 && !static_chain_p
10973 && drap_regno != CX_REG)
10974 regno = CX_REG;
10975 else if (ix86_save_reg (BX_REG, true))
10976 regno = BX_REG;
10977 /* esi is the static chain register. */
10978 else if (!(regparm == 3 && static_chain_p)
10979 && ix86_save_reg (SI_REG, true))
10980 regno = SI_REG;
10981 else if (ix86_save_reg (DI_REG, true))
10982 regno = DI_REG;
10983 else
10985 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10986 sr->saved = true;
10990 sr->reg = gen_rtx_REG (Pmode, regno);
10991 if (sr->saved)
10993 rtx_insn *insn = emit_insn (gen_push (sr->reg));
10994 RTX_FRAME_RELATED_P (insn) = 1;
10998 /* Release a scratch register obtained from the preceding function. */
11000 static void
11001 release_scratch_register_on_entry (struct scratch_reg *sr)
11003 if (sr->saved)
11005 struct machine_function *m = cfun->machine;
11006 rtx x, insn = emit_insn (gen_pop (sr->reg));
11008 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
11009 RTX_FRAME_RELATED_P (insn) = 1;
11010 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
11011 x = gen_rtx_SET (stack_pointer_rtx, x);
11012 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
11013 m->fs.sp_offset -= UNITS_PER_WORD;
11017 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
11019 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
11021 static void
11022 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
11024 /* We skip the probe for the first interval + a small dope of 4 words and
11025 probe that many bytes past the specified size to maintain a protection
11026 area at the botton of the stack. */
11027 const int dope = 4 * UNITS_PER_WORD;
11028 rtx size_rtx = GEN_INT (size), last;
11030 /* See if we have a constant small number of probes to generate. If so,
11031 that's the easy case. The run-time loop is made up of 11 insns in the
11032 generic case while the compile-time loop is made up of 3+2*(n-1) insns
11033 for n # of intervals. */
11034 if (size <= 5 * PROBE_INTERVAL)
11036 HOST_WIDE_INT i, adjust;
11037 bool first_probe = true;
11039 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
11040 values of N from 1 until it exceeds SIZE. If only one probe is
11041 needed, this will not generate any code. Then adjust and probe
11042 to PROBE_INTERVAL + SIZE. */
11043 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11045 if (first_probe)
11047 adjust = 2 * PROBE_INTERVAL + dope;
11048 first_probe = false;
11050 else
11051 adjust = PROBE_INTERVAL;
11053 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11054 plus_constant (Pmode, stack_pointer_rtx,
11055 -adjust)));
11056 emit_stack_probe (stack_pointer_rtx);
11059 if (first_probe)
11060 adjust = size + PROBE_INTERVAL + dope;
11061 else
11062 adjust = size + PROBE_INTERVAL - i;
11064 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11065 plus_constant (Pmode, stack_pointer_rtx,
11066 -adjust)));
11067 emit_stack_probe (stack_pointer_rtx);
11069 /* Adjust back to account for the additional first interval. */
11070 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11071 plus_constant (Pmode, stack_pointer_rtx,
11072 PROBE_INTERVAL + dope)));
11075 /* Otherwise, do the same as above, but in a loop. Note that we must be
11076 extra careful with variables wrapping around because we might be at
11077 the very top (or the very bottom) of the address space and we have
11078 to be able to handle this case properly; in particular, we use an
11079 equality test for the loop condition. */
11080 else
11082 HOST_WIDE_INT rounded_size;
11083 struct scratch_reg sr;
11085 get_scratch_register_on_entry (&sr);
11088 /* Step 1: round SIZE to the previous multiple of the interval. */
11090 rounded_size = size & -PROBE_INTERVAL;
11093 /* Step 2: compute initial and final value of the loop counter. */
11095 /* SP = SP_0 + PROBE_INTERVAL. */
11096 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11097 plus_constant (Pmode, stack_pointer_rtx,
11098 - (PROBE_INTERVAL + dope))));
11100 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
11101 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11102 emit_insn (gen_rtx_SET (sr.reg,
11103 gen_rtx_PLUS (Pmode, sr.reg,
11104 stack_pointer_rtx)));
11107 /* Step 3: the loop
11109 while (SP != LAST_ADDR)
11111 SP = SP + PROBE_INTERVAL
11112 probe at SP
11115 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11116 values of N from 1 until it is equal to ROUNDED_SIZE. */
11118 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11121 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11122 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
11124 if (size != rounded_size)
11126 emit_insn (gen_rtx_SET (stack_pointer_rtx,
11127 plus_constant (Pmode, stack_pointer_rtx,
11128 rounded_size - size)));
11129 emit_stack_probe (stack_pointer_rtx);
11132 /* Adjust back to account for the additional first interval. */
11133 last = emit_insn (gen_rtx_SET (stack_pointer_rtx,
11134 plus_constant (Pmode, stack_pointer_rtx,
11135 PROBE_INTERVAL + dope)));
11137 release_scratch_register_on_entry (&sr);
11140 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
11142 /* Even if the stack pointer isn't the CFA register, we need to correctly
11143 describe the adjustments made to it, in particular differentiate the
11144 frame-related ones from the frame-unrelated ones. */
11145 if (size > 0)
11147 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11148 XVECEXP (expr, 0, 0)
11149 = gen_rtx_SET (stack_pointer_rtx,
11150 plus_constant (Pmode, stack_pointer_rtx, -size));
11151 XVECEXP (expr, 0, 1)
11152 = gen_rtx_SET (stack_pointer_rtx,
11153 plus_constant (Pmode, stack_pointer_rtx,
11154 PROBE_INTERVAL + dope + size));
11155 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11156 RTX_FRAME_RELATED_P (last) = 1;
11158 cfun->machine->fs.sp_offset += size;
11161 /* Make sure nothing is scheduled before we are done. */
11162 emit_insn (gen_blockage ());
11165 /* Adjust the stack pointer up to REG while probing it. */
11167 const char *
11168 output_adjust_stack_and_probe (rtx reg)
11170 static int labelno = 0;
11171 char loop_lab[32], end_lab[32];
11172 rtx xops[2];
11174 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11175 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11177 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11179 /* Jump to END_LAB if SP == LAST_ADDR. */
11180 xops[0] = stack_pointer_rtx;
11181 xops[1] = reg;
11182 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11183 fputs ("\tje\t", asm_out_file);
11184 assemble_name_raw (asm_out_file, end_lab);
11185 fputc ('\n', asm_out_file);
11187 /* SP = SP + PROBE_INTERVAL. */
11188 xops[1] = GEN_INT (PROBE_INTERVAL);
11189 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11191 /* Probe at SP. */
11192 xops[1] = const0_rtx;
11193 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11195 fprintf (asm_out_file, "\tjmp\t");
11196 assemble_name_raw (asm_out_file, loop_lab);
11197 fputc ('\n', asm_out_file);
11199 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11201 return "";
11204 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11205 inclusive. These are offsets from the current stack pointer. */
11207 static void
11208 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11210 /* See if we have a constant small number of probes to generate. If so,
11211 that's the easy case. The run-time loop is made up of 7 insns in the
11212 generic case while the compile-time loop is made up of n insns for n #
11213 of intervals. */
11214 if (size <= 7 * PROBE_INTERVAL)
11216 HOST_WIDE_INT i;
11218 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11219 it exceeds SIZE. If only one probe is needed, this will not
11220 generate any code. Then probe at FIRST + SIZE. */
11221 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11222 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11223 -(first + i)));
11225 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11226 -(first + size)));
11229 /* Otherwise, do the same as above, but in a loop. Note that we must be
11230 extra careful with variables wrapping around because we might be at
11231 the very top (or the very bottom) of the address space and we have
11232 to be able to handle this case properly; in particular, we use an
11233 equality test for the loop condition. */
11234 else
11236 HOST_WIDE_INT rounded_size, last;
11237 struct scratch_reg sr;
11239 get_scratch_register_on_entry (&sr);
11242 /* Step 1: round SIZE to the previous multiple of the interval. */
11244 rounded_size = size & -PROBE_INTERVAL;
11247 /* Step 2: compute initial and final value of the loop counter. */
11249 /* TEST_OFFSET = FIRST. */
11250 emit_move_insn (sr.reg, GEN_INT (-first));
11252 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
11253 last = first + rounded_size;
11256 /* Step 3: the loop
11258 while (TEST_ADDR != LAST_ADDR)
11260 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11261 probe at TEST_ADDR
11264 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11265 until it is equal to ROUNDED_SIZE. */
11267 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11270 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11271 that SIZE is equal to ROUNDED_SIZE. */
11273 if (size != rounded_size)
11274 emit_stack_probe (plus_constant (Pmode,
11275 gen_rtx_PLUS (Pmode,
11276 stack_pointer_rtx,
11277 sr.reg),
11278 rounded_size - size));
11280 release_scratch_register_on_entry (&sr);
11283 /* Make sure nothing is scheduled before we are done. */
11284 emit_insn (gen_blockage ());
11287 /* Probe a range of stack addresses from REG to END, inclusive. These are
11288 offsets from the current stack pointer. */
11290 const char *
11291 output_probe_stack_range (rtx reg, rtx end)
11293 static int labelno = 0;
11294 char loop_lab[32], end_lab[32];
11295 rtx xops[3];
11297 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11298 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11300 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11302 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
11303 xops[0] = reg;
11304 xops[1] = end;
11305 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11306 fputs ("\tje\t", asm_out_file);
11307 assemble_name_raw (asm_out_file, end_lab);
11308 fputc ('\n', asm_out_file);
11310 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
11311 xops[1] = GEN_INT (PROBE_INTERVAL);
11312 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11314 /* Probe at TEST_ADDR. */
11315 xops[0] = stack_pointer_rtx;
11316 xops[1] = reg;
11317 xops[2] = const0_rtx;
11318 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11320 fprintf (asm_out_file, "\tjmp\t");
11321 assemble_name_raw (asm_out_file, loop_lab);
11322 fputc ('\n', asm_out_file);
11324 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11326 return "";
11329 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11330 to be generated in correct form. */
11331 static void
11332 ix86_finalize_stack_realign_flags (void)
11334 /* Check if stack realign is really needed after reload, and
11335 stores result in cfun */
11336 unsigned int incoming_stack_boundary
11337 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11338 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11339 unsigned int stack_realign = (incoming_stack_boundary
11340 < (crtl->is_leaf
11341 ? crtl->max_used_stack_slot_alignment
11342 : crtl->stack_alignment_needed));
11344 if (crtl->stack_realign_finalized)
11346 /* After stack_realign_needed is finalized, we can't no longer
11347 change it. */
11348 gcc_assert (crtl->stack_realign_needed == stack_realign);
11349 return;
11352 /* If the only reason for frame_pointer_needed is that we conservatively
11353 assumed stack realignment might be needed, but in the end nothing that
11354 needed the stack alignment had been spilled, clear frame_pointer_needed
11355 and say we don't need stack realignment. */
11356 if (stack_realign
11357 && frame_pointer_needed
11358 && crtl->is_leaf
11359 && flag_omit_frame_pointer
11360 && crtl->sp_is_unchanging
11361 && !ix86_current_function_calls_tls_descriptor
11362 && !crtl->accesses_prior_frames
11363 && !cfun->calls_alloca
11364 && !crtl->calls_eh_return
11365 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
11366 && !ix86_frame_pointer_required ()
11367 && get_frame_size () == 0
11368 && ix86_nsaved_sseregs () == 0
11369 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11371 HARD_REG_SET set_up_by_prologue, prologue_used;
11372 basic_block bb;
11374 CLEAR_HARD_REG_SET (prologue_used);
11375 CLEAR_HARD_REG_SET (set_up_by_prologue);
11376 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11377 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11378 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11379 HARD_FRAME_POINTER_REGNUM);
11380 FOR_EACH_BB_FN (bb, cfun)
11382 rtx_insn *insn;
11383 FOR_BB_INSNS (bb, insn)
11384 if (NONDEBUG_INSN_P (insn)
11385 && requires_stack_frame_p (insn, prologue_used,
11386 set_up_by_prologue))
11388 crtl->stack_realign_needed = stack_realign;
11389 crtl->stack_realign_finalized = true;
11390 return;
11394 /* If drap has been set, but it actually isn't live at the start
11395 of the function, there is no reason to set it up. */
11396 if (crtl->drap_reg)
11398 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11399 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11401 crtl->drap_reg = NULL_RTX;
11402 crtl->need_drap = false;
11405 else
11406 cfun->machine->no_drap_save_restore = true;
11408 frame_pointer_needed = false;
11409 stack_realign = false;
11410 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11411 crtl->stack_alignment_needed = incoming_stack_boundary;
11412 crtl->stack_alignment_estimated = incoming_stack_boundary;
11413 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11414 crtl->preferred_stack_boundary = incoming_stack_boundary;
11415 df_finish_pass (true);
11416 df_scan_alloc (NULL);
11417 df_scan_blocks ();
11418 df_compute_regs_ever_live (true);
11419 df_analyze ();
11422 crtl->stack_realign_needed = stack_realign;
11423 crtl->stack_realign_finalized = true;
11426 /* Delete SET_GOT right after entry block if it is allocated to reg. */
11428 static void
11429 ix86_elim_entry_set_got (rtx reg)
11431 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11432 rtx_insn *c_insn = BB_HEAD (bb);
11433 if (!NONDEBUG_INSN_P (c_insn))
11434 c_insn = next_nonnote_nondebug_insn (c_insn);
11435 if (c_insn && NONJUMP_INSN_P (c_insn))
11437 rtx pat = PATTERN (c_insn);
11438 if (GET_CODE (pat) == PARALLEL)
11440 rtx vec = XVECEXP (pat, 0, 0);
11441 if (GET_CODE (vec) == SET
11442 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11443 && REGNO (XEXP (vec, 0)) == REGNO (reg))
11444 delete_insn (c_insn);
11449 /* Expand the prologue into a bunch of separate insns. */
11451 void
11452 ix86_expand_prologue (void)
11454 struct machine_function *m = cfun->machine;
11455 rtx insn, t;
11456 struct ix86_frame frame;
11457 HOST_WIDE_INT allocate;
11458 bool int_registers_saved;
11459 bool sse_registers_saved;
11460 rtx static_chain = NULL_RTX;
11462 ix86_finalize_stack_realign_flags ();
11464 /* DRAP should not coexist with stack_realign_fp */
11465 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11467 memset (&m->fs, 0, sizeof (m->fs));
11469 /* Initialize CFA state for before the prologue. */
11470 m->fs.cfa_reg = stack_pointer_rtx;
11471 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11473 /* Track SP offset to the CFA. We continue tracking this after we've
11474 swapped the CFA register away from SP. In the case of re-alignment
11475 this is fudged; we're interested to offsets within the local frame. */
11476 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11477 m->fs.sp_valid = true;
11479 ix86_compute_frame_layout (&frame);
11481 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11483 /* We should have already generated an error for any use of
11484 ms_hook on a nested function. */
11485 gcc_checking_assert (!ix86_static_chain_on_stack);
11487 /* Check if profiling is active and we shall use profiling before
11488 prologue variant. If so sorry. */
11489 if (crtl->profile && flag_fentry != 0)
11490 sorry ("ms_hook_prologue attribute isn%'t compatible "
11491 "with -mfentry for 32-bit");
11493 /* In ix86_asm_output_function_label we emitted:
11494 8b ff movl.s %edi,%edi
11495 55 push %ebp
11496 8b ec movl.s %esp,%ebp
11498 This matches the hookable function prologue in Win32 API
11499 functions in Microsoft Windows XP Service Pack 2 and newer.
11500 Wine uses this to enable Windows apps to hook the Win32 API
11501 functions provided by Wine.
11503 What that means is that we've already set up the frame pointer. */
11505 if (frame_pointer_needed
11506 && !(crtl->drap_reg && crtl->stack_realign_needed))
11508 rtx push, mov;
11510 /* We've decided to use the frame pointer already set up.
11511 Describe this to the unwinder by pretending that both
11512 push and mov insns happen right here.
11514 Putting the unwind info here at the end of the ms_hook
11515 is done so that we can make absolutely certain we get
11516 the required byte sequence at the start of the function,
11517 rather than relying on an assembler that can produce
11518 the exact encoding required.
11520 However it does mean (in the unpatched case) that we have
11521 a 1 insn window where the asynchronous unwind info is
11522 incorrect. However, if we placed the unwind info at
11523 its correct location we would have incorrect unwind info
11524 in the patched case. Which is probably all moot since
11525 I don't expect Wine generates dwarf2 unwind info for the
11526 system libraries that use this feature. */
11528 insn = emit_insn (gen_blockage ());
11530 push = gen_push (hard_frame_pointer_rtx);
11531 mov = gen_rtx_SET (hard_frame_pointer_rtx,
11532 stack_pointer_rtx);
11533 RTX_FRAME_RELATED_P (push) = 1;
11534 RTX_FRAME_RELATED_P (mov) = 1;
11536 RTX_FRAME_RELATED_P (insn) = 1;
11537 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11538 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11540 /* Note that gen_push incremented m->fs.cfa_offset, even
11541 though we didn't emit the push insn here. */
11542 m->fs.cfa_reg = hard_frame_pointer_rtx;
11543 m->fs.fp_offset = m->fs.cfa_offset;
11544 m->fs.fp_valid = true;
11546 else
11548 /* The frame pointer is not needed so pop %ebp again.
11549 This leaves us with a pristine state. */
11550 emit_insn (gen_pop (hard_frame_pointer_rtx));
11554 /* The first insn of a function that accepts its static chain on the
11555 stack is to push the register that would be filled in by a direct
11556 call. This insn will be skipped by the trampoline. */
11557 else if (ix86_static_chain_on_stack)
11559 static_chain = ix86_static_chain (cfun->decl, false);
11560 insn = emit_insn (gen_push (static_chain));
11561 emit_insn (gen_blockage ());
11563 /* We don't want to interpret this push insn as a register save,
11564 only as a stack adjustment. The real copy of the register as
11565 a save will be done later, if needed. */
11566 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11567 t = gen_rtx_SET (stack_pointer_rtx, t);
11568 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11569 RTX_FRAME_RELATED_P (insn) = 1;
11572 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11573 of DRAP is needed and stack realignment is really needed after reload */
11574 if (stack_realign_drap)
11576 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11578 /* Only need to push parameter pointer reg if it is caller saved. */
11579 if (!call_used_regs[REGNO (crtl->drap_reg)])
11581 /* Push arg pointer reg */
11582 insn = emit_insn (gen_push (crtl->drap_reg));
11583 RTX_FRAME_RELATED_P (insn) = 1;
11586 /* Grab the argument pointer. */
11587 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11588 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11589 RTX_FRAME_RELATED_P (insn) = 1;
11590 m->fs.cfa_reg = crtl->drap_reg;
11591 m->fs.cfa_offset = 0;
11593 /* Align the stack. */
11594 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11595 stack_pointer_rtx,
11596 GEN_INT (-align_bytes)));
11597 RTX_FRAME_RELATED_P (insn) = 1;
11599 /* Replicate the return address on the stack so that return
11600 address can be reached via (argp - 1) slot. This is needed
11601 to implement macro RETURN_ADDR_RTX and intrinsic function
11602 expand_builtin_return_addr etc. */
11603 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11604 t = gen_frame_mem (word_mode, t);
11605 insn = emit_insn (gen_push (t));
11606 RTX_FRAME_RELATED_P (insn) = 1;
11608 /* For the purposes of frame and register save area addressing,
11609 we've started over with a new frame. */
11610 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11611 m->fs.realigned = true;
11613 if (static_chain)
11615 /* Replicate static chain on the stack so that static chain
11616 can be reached via (argp - 2) slot. This is needed for
11617 nested function with stack realignment. */
11618 insn = emit_insn (gen_push (static_chain));
11619 RTX_FRAME_RELATED_P (insn) = 1;
11623 int_registers_saved = (frame.nregs == 0);
11624 sse_registers_saved = (frame.nsseregs == 0);
11626 if (frame_pointer_needed && !m->fs.fp_valid)
11628 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11629 slower on all targets. Also sdb doesn't like it. */
11630 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11631 RTX_FRAME_RELATED_P (insn) = 1;
11633 /* Push registers now, before setting the frame pointer
11634 on SEH target. */
11635 if (!int_registers_saved
11636 && TARGET_SEH
11637 && !frame.save_regs_using_mov)
11639 ix86_emit_save_regs ();
11640 int_registers_saved = true;
11641 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11644 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11646 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11647 RTX_FRAME_RELATED_P (insn) = 1;
11649 if (m->fs.cfa_reg == stack_pointer_rtx)
11650 m->fs.cfa_reg = hard_frame_pointer_rtx;
11651 m->fs.fp_offset = m->fs.sp_offset;
11652 m->fs.fp_valid = true;
11656 if (!int_registers_saved)
11658 /* If saving registers via PUSH, do so now. */
11659 if (!frame.save_regs_using_mov)
11661 ix86_emit_save_regs ();
11662 int_registers_saved = true;
11663 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11666 /* When using red zone we may start register saving before allocating
11667 the stack frame saving one cycle of the prologue. However, avoid
11668 doing this if we have to probe the stack; at least on x86_64 the
11669 stack probe can turn into a call that clobbers a red zone location. */
11670 else if (ix86_using_red_zone ()
11671 && (! TARGET_STACK_PROBE
11672 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11674 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11675 int_registers_saved = true;
11679 if (stack_realign_fp)
11681 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11682 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11684 /* The computation of the size of the re-aligned stack frame means
11685 that we must allocate the size of the register save area before
11686 performing the actual alignment. Otherwise we cannot guarantee
11687 that there's enough storage above the realignment point. */
11688 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11689 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11690 GEN_INT (m->fs.sp_offset
11691 - frame.sse_reg_save_offset),
11692 -1, false);
11694 /* Align the stack. */
11695 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11696 stack_pointer_rtx,
11697 GEN_INT (-align_bytes)));
11699 /* For the purposes of register save area addressing, the stack
11700 pointer is no longer valid. As for the value of sp_offset,
11701 see ix86_compute_frame_layout, which we need to match in order
11702 to pass verification of stack_pointer_offset at the end. */
11703 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11704 m->fs.sp_valid = false;
11707 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11709 if (flag_stack_usage_info)
11711 /* We start to count from ARG_POINTER. */
11712 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11714 /* If it was realigned, take into account the fake frame. */
11715 if (stack_realign_drap)
11717 if (ix86_static_chain_on_stack)
11718 stack_size += UNITS_PER_WORD;
11720 if (!call_used_regs[REGNO (crtl->drap_reg)])
11721 stack_size += UNITS_PER_WORD;
11723 /* This over-estimates by 1 minimal-stack-alignment-unit but
11724 mitigates that by counting in the new return address slot. */
11725 current_function_dynamic_stack_size
11726 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11729 current_function_static_stack_size = stack_size;
11732 /* On SEH target with very large frame size, allocate an area to save
11733 SSE registers (as the very large allocation won't be described). */
11734 if (TARGET_SEH
11735 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11736 && !sse_registers_saved)
11738 HOST_WIDE_INT sse_size =
11739 frame.sse_reg_save_offset - frame.reg_save_offset;
11741 gcc_assert (int_registers_saved);
11743 /* No need to do stack checking as the area will be immediately
11744 written. */
11745 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11746 GEN_INT (-sse_size), -1,
11747 m->fs.cfa_reg == stack_pointer_rtx);
11748 allocate -= sse_size;
11749 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11750 sse_registers_saved = true;
11753 /* The stack has already been decremented by the instruction calling us
11754 so probe if the size is non-negative to preserve the protection area. */
11755 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11757 /* We expect the registers to be saved when probes are used. */
11758 gcc_assert (int_registers_saved);
11760 if (STACK_CHECK_MOVING_SP)
11762 if (!(crtl->is_leaf && !cfun->calls_alloca
11763 && allocate <= PROBE_INTERVAL))
11765 ix86_adjust_stack_and_probe (allocate);
11766 allocate = 0;
11769 else
11771 HOST_WIDE_INT size = allocate;
11773 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11774 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11776 if (TARGET_STACK_PROBE)
11778 if (crtl->is_leaf && !cfun->calls_alloca)
11780 if (size > PROBE_INTERVAL)
11781 ix86_emit_probe_stack_range (0, size);
11783 else
11784 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11786 else
11788 if (crtl->is_leaf && !cfun->calls_alloca)
11790 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11791 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11792 size - STACK_CHECK_PROTECT);
11794 else
11795 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11800 if (allocate == 0)
11802 else if (!ix86_target_stack_probe ()
11803 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11805 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11806 GEN_INT (-allocate), -1,
11807 m->fs.cfa_reg == stack_pointer_rtx);
11809 else
11811 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11812 rtx r10 = NULL;
11813 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11814 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11815 bool eax_live = ix86_eax_live_at_start_p ();
11816 bool r10_live = false;
11818 if (TARGET_64BIT)
11819 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11821 if (eax_live)
11823 insn = emit_insn (gen_push (eax));
11824 allocate -= UNITS_PER_WORD;
11825 /* Note that SEH directives need to continue tracking the stack
11826 pointer even after the frame pointer has been set up. */
11827 if (sp_is_cfa_reg || TARGET_SEH)
11829 if (sp_is_cfa_reg)
11830 m->fs.cfa_offset += UNITS_PER_WORD;
11831 RTX_FRAME_RELATED_P (insn) = 1;
11832 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11833 gen_rtx_SET (stack_pointer_rtx,
11834 plus_constant (Pmode, stack_pointer_rtx,
11835 -UNITS_PER_WORD)));
11839 if (r10_live)
11841 r10 = gen_rtx_REG (Pmode, R10_REG);
11842 insn = emit_insn (gen_push (r10));
11843 allocate -= UNITS_PER_WORD;
11844 if (sp_is_cfa_reg || TARGET_SEH)
11846 if (sp_is_cfa_reg)
11847 m->fs.cfa_offset += UNITS_PER_WORD;
11848 RTX_FRAME_RELATED_P (insn) = 1;
11849 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11850 gen_rtx_SET (stack_pointer_rtx,
11851 plus_constant (Pmode, stack_pointer_rtx,
11852 -UNITS_PER_WORD)));
11856 emit_move_insn (eax, GEN_INT (allocate));
11857 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11859 /* Use the fact that AX still contains ALLOCATE. */
11860 adjust_stack_insn = (Pmode == DImode
11861 ? gen_pro_epilogue_adjust_stack_di_sub
11862 : gen_pro_epilogue_adjust_stack_si_sub);
11864 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11865 stack_pointer_rtx, eax));
11867 if (sp_is_cfa_reg || TARGET_SEH)
11869 if (sp_is_cfa_reg)
11870 m->fs.cfa_offset += allocate;
11871 RTX_FRAME_RELATED_P (insn) = 1;
11872 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11873 gen_rtx_SET (stack_pointer_rtx,
11874 plus_constant (Pmode, stack_pointer_rtx,
11875 -allocate)));
11877 m->fs.sp_offset += allocate;
11879 /* Use stack_pointer_rtx for relative addressing so that code
11880 works for realigned stack, too. */
11881 if (r10_live && eax_live)
11883 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11884 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11885 gen_frame_mem (word_mode, t));
11886 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11887 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11888 gen_frame_mem (word_mode, t));
11890 else if (eax_live || r10_live)
11892 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11893 emit_move_insn (gen_rtx_REG (word_mode,
11894 (eax_live ? AX_REG : R10_REG)),
11895 gen_frame_mem (word_mode, t));
11898 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11900 /* If we havn't already set up the frame pointer, do so now. */
11901 if (frame_pointer_needed && !m->fs.fp_valid)
11903 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11904 GEN_INT (frame.stack_pointer_offset
11905 - frame.hard_frame_pointer_offset));
11906 insn = emit_insn (insn);
11907 RTX_FRAME_RELATED_P (insn) = 1;
11908 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11910 if (m->fs.cfa_reg == stack_pointer_rtx)
11911 m->fs.cfa_reg = hard_frame_pointer_rtx;
11912 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11913 m->fs.fp_valid = true;
11916 if (!int_registers_saved)
11917 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11918 if (!sse_registers_saved)
11919 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11921 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11922 in PROLOGUE. */
11923 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11925 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11926 insn = emit_insn (gen_set_got (pic));
11927 RTX_FRAME_RELATED_P (insn) = 1;
11928 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11929 emit_insn (gen_prologue_use (pic));
11930 /* Deleting already emmitted SET_GOT if exist and allocated to
11931 REAL_PIC_OFFSET_TABLE_REGNUM. */
11932 ix86_elim_entry_set_got (pic);
11935 if (crtl->drap_reg && !crtl->stack_realign_needed)
11937 /* vDRAP is setup but after reload it turns out stack realign
11938 isn't necessary, here we will emit prologue to setup DRAP
11939 without stack realign adjustment */
11940 t = choose_baseaddr (0);
11941 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
11944 /* Prevent instructions from being scheduled into register save push
11945 sequence when access to the redzone area is done through frame pointer.
11946 The offset between the frame pointer and the stack pointer is calculated
11947 relative to the value of the stack pointer at the end of the function
11948 prologue, and moving instructions that access redzone area via frame
11949 pointer inside push sequence violates this assumption. */
11950 if (frame_pointer_needed && frame.red_zone_size)
11951 emit_insn (gen_memory_blockage ());
11953 /* Emit cld instruction if stringops are used in the function. */
11954 if (TARGET_CLD && ix86_current_function_needs_cld)
11955 emit_insn (gen_cld ());
11957 /* SEH requires that the prologue end within 256 bytes of the start of
11958 the function. Prevent instruction schedules that would extend that.
11959 Further, prevent alloca modifications to the stack pointer from being
11960 combined with prologue modifications. */
11961 if (TARGET_SEH)
11962 emit_insn (gen_prologue_use (stack_pointer_rtx));
11965 /* Emit code to restore REG using a POP insn. */
11967 static void
11968 ix86_emit_restore_reg_using_pop (rtx reg)
11970 struct machine_function *m = cfun->machine;
11971 rtx_insn *insn = emit_insn (gen_pop (reg));
11973 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11974 m->fs.sp_offset -= UNITS_PER_WORD;
11976 if (m->fs.cfa_reg == crtl->drap_reg
11977 && REGNO (reg) == REGNO (crtl->drap_reg))
11979 /* Previously we'd represented the CFA as an expression
11980 like *(%ebp - 8). We've just popped that value from
11981 the stack, which means we need to reset the CFA to
11982 the drap register. This will remain until we restore
11983 the stack pointer. */
11984 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11985 RTX_FRAME_RELATED_P (insn) = 1;
11987 /* This means that the DRAP register is valid for addressing too. */
11988 m->fs.drap_valid = true;
11989 return;
11992 if (m->fs.cfa_reg == stack_pointer_rtx)
11994 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11995 x = gen_rtx_SET (stack_pointer_rtx, x);
11996 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11997 RTX_FRAME_RELATED_P (insn) = 1;
11999 m->fs.cfa_offset -= UNITS_PER_WORD;
12002 /* When the frame pointer is the CFA, and we pop it, we are
12003 swapping back to the stack pointer as the CFA. This happens
12004 for stack frames that don't allocate other data, so we assume
12005 the stack pointer is now pointing at the return address, i.e.
12006 the function entry state, which makes the offset be 1 word. */
12007 if (reg == hard_frame_pointer_rtx)
12009 m->fs.fp_valid = false;
12010 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12012 m->fs.cfa_reg = stack_pointer_rtx;
12013 m->fs.cfa_offset -= UNITS_PER_WORD;
12015 add_reg_note (insn, REG_CFA_DEF_CFA,
12016 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12017 GEN_INT (m->fs.cfa_offset)));
12018 RTX_FRAME_RELATED_P (insn) = 1;
12023 /* Emit code to restore saved registers using POP insns. */
12025 static void
12026 ix86_emit_restore_regs_using_pop (void)
12028 unsigned int regno;
12030 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12031 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
12032 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
12035 /* Emit code and notes for the LEAVE instruction. */
12037 static void
12038 ix86_emit_leave (void)
12040 struct machine_function *m = cfun->machine;
12041 rtx_insn *insn = emit_insn (ix86_gen_leave ());
12043 ix86_add_queued_cfa_restore_notes (insn);
12045 gcc_assert (m->fs.fp_valid);
12046 m->fs.sp_valid = true;
12047 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
12048 m->fs.fp_valid = false;
12050 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
12052 m->fs.cfa_reg = stack_pointer_rtx;
12053 m->fs.cfa_offset = m->fs.sp_offset;
12055 add_reg_note (insn, REG_CFA_DEF_CFA,
12056 plus_constant (Pmode, stack_pointer_rtx,
12057 m->fs.sp_offset));
12058 RTX_FRAME_RELATED_P (insn) = 1;
12060 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
12061 m->fs.fp_offset);
12064 /* Emit code to restore saved registers using MOV insns.
12065 First register is restored from CFA - CFA_OFFSET. */
12066 static void
12067 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
12068 bool maybe_eh_return)
12070 struct machine_function *m = cfun->machine;
12071 unsigned int regno;
12073 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12074 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12076 rtx reg = gen_rtx_REG (word_mode, regno);
12077 rtx mem;
12078 rtx_insn *insn;
12080 mem = choose_baseaddr (cfa_offset);
12081 mem = gen_frame_mem (word_mode, mem);
12082 insn = emit_move_insn (reg, mem);
12084 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12086 /* Previously we'd represented the CFA as an expression
12087 like *(%ebp - 8). We've just popped that value from
12088 the stack, which means we need to reset the CFA to
12089 the drap register. This will remain until we restore
12090 the stack pointer. */
12091 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12092 RTX_FRAME_RELATED_P (insn) = 1;
12094 /* This means that the DRAP register is valid for addressing. */
12095 m->fs.drap_valid = true;
12097 else
12098 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12100 cfa_offset -= UNITS_PER_WORD;
12104 /* Emit code to restore saved registers using MOV insns.
12105 First register is restored from CFA - CFA_OFFSET. */
12106 static void
12107 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12108 bool maybe_eh_return)
12110 unsigned int regno;
12112 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12113 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12115 rtx reg = gen_rtx_REG (V4SFmode, regno);
12116 rtx mem;
12118 mem = choose_baseaddr (cfa_offset);
12119 mem = gen_rtx_MEM (V4SFmode, mem);
12120 set_mem_align (mem, 128);
12121 emit_move_insn (reg, mem);
12123 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
12125 cfa_offset -= 16;
12129 /* Restore function stack, frame, and registers. */
12131 void
12132 ix86_expand_epilogue (int style)
12134 struct machine_function *m = cfun->machine;
12135 struct machine_frame_state frame_state_save = m->fs;
12136 struct ix86_frame frame;
12137 bool restore_regs_via_mov;
12138 bool using_drap;
12140 ix86_finalize_stack_realign_flags ();
12141 ix86_compute_frame_layout (&frame);
12143 m->fs.sp_valid = (!frame_pointer_needed
12144 || (crtl->sp_is_unchanging
12145 && !stack_realign_fp));
12146 gcc_assert (!m->fs.sp_valid
12147 || m->fs.sp_offset == frame.stack_pointer_offset);
12149 /* The FP must be valid if the frame pointer is present. */
12150 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12151 gcc_assert (!m->fs.fp_valid
12152 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12154 /* We must have *some* valid pointer to the stack frame. */
12155 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12157 /* The DRAP is never valid at this point. */
12158 gcc_assert (!m->fs.drap_valid);
12160 /* See the comment about red zone and frame
12161 pointer usage in ix86_expand_prologue. */
12162 if (frame_pointer_needed && frame.red_zone_size)
12163 emit_insn (gen_memory_blockage ());
12165 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12166 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12168 /* Determine the CFA offset of the end of the red-zone. */
12169 m->fs.red_zone_offset = 0;
12170 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12172 /* The red-zone begins below the return address. */
12173 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12175 /* When the register save area is in the aligned portion of
12176 the stack, determine the maximum runtime displacement that
12177 matches up with the aligned frame. */
12178 if (stack_realign_drap)
12179 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12180 + UNITS_PER_WORD);
12183 /* Special care must be taken for the normal return case of a function
12184 using eh_return: the eax and edx registers are marked as saved, but
12185 not restored along this path. Adjust the save location to match. */
12186 if (crtl->calls_eh_return && style != 2)
12187 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12189 /* EH_RETURN requires the use of moves to function properly. */
12190 if (crtl->calls_eh_return)
12191 restore_regs_via_mov = true;
12192 /* SEH requires the use of pops to identify the epilogue. */
12193 else if (TARGET_SEH)
12194 restore_regs_via_mov = false;
12195 /* If we're only restoring one register and sp is not valid then
12196 using a move instruction to restore the register since it's
12197 less work than reloading sp and popping the register. */
12198 else if (!m->fs.sp_valid && frame.nregs <= 1)
12199 restore_regs_via_mov = true;
12200 else if (TARGET_EPILOGUE_USING_MOVE
12201 && cfun->machine->use_fast_prologue_epilogue
12202 && (frame.nregs > 1
12203 || m->fs.sp_offset != frame.reg_save_offset))
12204 restore_regs_via_mov = true;
12205 else if (frame_pointer_needed
12206 && !frame.nregs
12207 && m->fs.sp_offset != frame.reg_save_offset)
12208 restore_regs_via_mov = true;
12209 else if (frame_pointer_needed
12210 && TARGET_USE_LEAVE
12211 && cfun->machine->use_fast_prologue_epilogue
12212 && frame.nregs == 1)
12213 restore_regs_via_mov = true;
12214 else
12215 restore_regs_via_mov = false;
12217 if (restore_regs_via_mov || frame.nsseregs)
12219 /* Ensure that the entire register save area is addressable via
12220 the stack pointer, if we will restore via sp. */
12221 if (TARGET_64BIT
12222 && m->fs.sp_offset > 0x7fffffff
12223 && !(m->fs.fp_valid || m->fs.drap_valid)
12224 && (frame.nsseregs + frame.nregs) != 0)
12226 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12227 GEN_INT (m->fs.sp_offset
12228 - frame.sse_reg_save_offset),
12229 style,
12230 m->fs.cfa_reg == stack_pointer_rtx);
12234 /* If there are any SSE registers to restore, then we have to do it
12235 via moves, since there's obviously no pop for SSE regs. */
12236 if (frame.nsseregs)
12237 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12238 style == 2);
12240 if (restore_regs_via_mov)
12242 rtx t;
12244 if (frame.nregs)
12245 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12247 /* eh_return epilogues need %ecx added to the stack pointer. */
12248 if (style == 2)
12250 rtx sa = EH_RETURN_STACKADJ_RTX;
12251 rtx_insn *insn;
12253 /* Stack align doesn't work with eh_return. */
12254 gcc_assert (!stack_realign_drap);
12255 /* Neither does regparm nested functions. */
12256 gcc_assert (!ix86_static_chain_on_stack);
12258 if (frame_pointer_needed)
12260 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12261 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12262 emit_insn (gen_rtx_SET (sa, t));
12264 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12265 insn = emit_move_insn (hard_frame_pointer_rtx, t);
12267 /* Note that we use SA as a temporary CFA, as the return
12268 address is at the proper place relative to it. We
12269 pretend this happens at the FP restore insn because
12270 prior to this insn the FP would be stored at the wrong
12271 offset relative to SA, and after this insn we have no
12272 other reasonable register to use for the CFA. We don't
12273 bother resetting the CFA to the SP for the duration of
12274 the return insn. */
12275 add_reg_note (insn, REG_CFA_DEF_CFA,
12276 plus_constant (Pmode, sa, UNITS_PER_WORD));
12277 ix86_add_queued_cfa_restore_notes (insn);
12278 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12279 RTX_FRAME_RELATED_P (insn) = 1;
12281 m->fs.cfa_reg = sa;
12282 m->fs.cfa_offset = UNITS_PER_WORD;
12283 m->fs.fp_valid = false;
12285 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12286 const0_rtx, style, false);
12288 else
12290 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12291 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12292 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
12293 ix86_add_queued_cfa_restore_notes (insn);
12295 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12296 if (m->fs.cfa_offset != UNITS_PER_WORD)
12298 m->fs.cfa_offset = UNITS_PER_WORD;
12299 add_reg_note (insn, REG_CFA_DEF_CFA,
12300 plus_constant (Pmode, stack_pointer_rtx,
12301 UNITS_PER_WORD));
12302 RTX_FRAME_RELATED_P (insn) = 1;
12305 m->fs.sp_offset = UNITS_PER_WORD;
12306 m->fs.sp_valid = true;
12309 else
12311 /* SEH requires that the function end with (1) a stack adjustment
12312 if necessary, (2) a sequence of pops, and (3) a return or
12313 jump instruction. Prevent insns from the function body from
12314 being scheduled into this sequence. */
12315 if (TARGET_SEH)
12317 /* Prevent a catch region from being adjacent to the standard
12318 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
12319 several other flags that would be interesting to test are
12320 not yet set up. */
12321 if (flag_non_call_exceptions)
12322 emit_insn (gen_nops (const1_rtx));
12323 else
12324 emit_insn (gen_blockage ());
12327 /* First step is to deallocate the stack frame so that we can
12328 pop the registers. Also do it on SEH target for very large
12329 frame as the emitted instructions aren't allowed by the ABI in
12330 epilogues. */
12331 if (!m->fs.sp_valid
12332 || (TARGET_SEH
12333 && (m->fs.sp_offset - frame.reg_save_offset
12334 >= SEH_MAX_FRAME_SIZE)))
12336 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12337 GEN_INT (m->fs.fp_offset
12338 - frame.reg_save_offset),
12339 style, false);
12341 else if (m->fs.sp_offset != frame.reg_save_offset)
12343 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12344 GEN_INT (m->fs.sp_offset
12345 - frame.reg_save_offset),
12346 style,
12347 m->fs.cfa_reg == stack_pointer_rtx);
12350 ix86_emit_restore_regs_using_pop ();
12353 /* If we used a stack pointer and haven't already got rid of it,
12354 then do so now. */
12355 if (m->fs.fp_valid)
12357 /* If the stack pointer is valid and pointing at the frame
12358 pointer store address, then we only need a pop. */
12359 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12360 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12361 /* Leave results in shorter dependency chains on CPUs that are
12362 able to grok it fast. */
12363 else if (TARGET_USE_LEAVE
12364 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12365 || !cfun->machine->use_fast_prologue_epilogue)
12366 ix86_emit_leave ();
12367 else
12369 pro_epilogue_adjust_stack (stack_pointer_rtx,
12370 hard_frame_pointer_rtx,
12371 const0_rtx, style, !using_drap);
12372 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12376 if (using_drap)
12378 int param_ptr_offset = UNITS_PER_WORD;
12379 rtx_insn *insn;
12381 gcc_assert (stack_realign_drap);
12383 if (ix86_static_chain_on_stack)
12384 param_ptr_offset += UNITS_PER_WORD;
12385 if (!call_used_regs[REGNO (crtl->drap_reg)])
12386 param_ptr_offset += UNITS_PER_WORD;
12388 insn = emit_insn (gen_rtx_SET
12389 (stack_pointer_rtx,
12390 gen_rtx_PLUS (Pmode,
12391 crtl->drap_reg,
12392 GEN_INT (-param_ptr_offset))));
12393 m->fs.cfa_reg = stack_pointer_rtx;
12394 m->fs.cfa_offset = param_ptr_offset;
12395 m->fs.sp_offset = param_ptr_offset;
12396 m->fs.realigned = false;
12398 add_reg_note (insn, REG_CFA_DEF_CFA,
12399 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12400 GEN_INT (param_ptr_offset)));
12401 RTX_FRAME_RELATED_P (insn) = 1;
12403 if (!call_used_regs[REGNO (crtl->drap_reg)])
12404 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12407 /* At this point the stack pointer must be valid, and we must have
12408 restored all of the registers. We may not have deallocated the
12409 entire stack frame. We've delayed this until now because it may
12410 be possible to merge the local stack deallocation with the
12411 deallocation forced by ix86_static_chain_on_stack. */
12412 gcc_assert (m->fs.sp_valid);
12413 gcc_assert (!m->fs.fp_valid);
12414 gcc_assert (!m->fs.realigned);
12415 if (m->fs.sp_offset != UNITS_PER_WORD)
12417 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12418 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12419 style, true);
12421 else
12422 ix86_add_queued_cfa_restore_notes (get_last_insn ());
12424 /* Sibcall epilogues don't want a return instruction. */
12425 if (style == 0)
12427 m->fs = frame_state_save;
12428 return;
12431 if (crtl->args.pops_args && crtl->args.size)
12433 rtx popc = GEN_INT (crtl->args.pops_args);
12435 /* i386 can only pop 64K bytes. If asked to pop more, pop return
12436 address, do explicit add, and jump indirectly to the caller. */
12438 if (crtl->args.pops_args >= 65536)
12440 rtx ecx = gen_rtx_REG (SImode, CX_REG);
12441 rtx_insn *insn;
12443 /* There is no "pascal" calling convention in any 64bit ABI. */
12444 gcc_assert (!TARGET_64BIT);
12446 insn = emit_insn (gen_pop (ecx));
12447 m->fs.cfa_offset -= UNITS_PER_WORD;
12448 m->fs.sp_offset -= UNITS_PER_WORD;
12450 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12451 x = gen_rtx_SET (stack_pointer_rtx, x);
12452 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12453 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
12454 RTX_FRAME_RELATED_P (insn) = 1;
12456 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12457 popc, -1, true);
12458 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12460 else
12461 emit_jump_insn (gen_simple_return_pop_internal (popc));
12463 else
12464 emit_jump_insn (gen_simple_return_internal ());
12466 /* Restore the state back to the state from the prologue,
12467 so that it's correct for the next epilogue. */
12468 m->fs = frame_state_save;
12471 /* Reset from the function's potential modifications. */
12473 static void
12474 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12476 if (pic_offset_table_rtx
12477 && !ix86_use_pseudo_pic_reg ())
12478 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12479 #if TARGET_MACHO
12480 /* Mach-O doesn't support labels at the end of objects, so if
12481 it looks like we might want one, insert a NOP. */
12483 rtx_insn *insn = get_last_insn ();
12484 rtx_insn *deleted_debug_label = NULL;
12485 while (insn
12486 && NOTE_P (insn)
12487 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12489 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12490 notes only, instead set their CODE_LABEL_NUMBER to -1,
12491 otherwise there would be code generation differences
12492 in between -g and -g0. */
12493 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12494 deleted_debug_label = insn;
12495 insn = PREV_INSN (insn);
12497 if (insn
12498 && (LABEL_P (insn)
12499 || (NOTE_P (insn)
12500 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12501 fputs ("\tnop\n", file);
12502 else if (deleted_debug_label)
12503 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12504 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12505 CODE_LABEL_NUMBER (insn) = -1;
12507 #endif
12511 /* Return a scratch register to use in the split stack prologue. The
12512 split stack prologue is used for -fsplit-stack. It is the first
12513 instructions in the function, even before the regular prologue.
12514 The scratch register can be any caller-saved register which is not
12515 used for parameters or for the static chain. */
12517 static unsigned int
12518 split_stack_prologue_scratch_regno (void)
12520 if (TARGET_64BIT)
12521 return R11_REG;
12522 else
12524 bool is_fastcall, is_thiscall;
12525 int regparm;
12527 is_fastcall = (lookup_attribute ("fastcall",
12528 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12529 != NULL);
12530 is_thiscall = (lookup_attribute ("thiscall",
12531 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12532 != NULL);
12533 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12535 if (is_fastcall)
12537 if (DECL_STATIC_CHAIN (cfun->decl))
12539 sorry ("-fsplit-stack does not support fastcall with "
12540 "nested function");
12541 return INVALID_REGNUM;
12543 return AX_REG;
12545 else if (is_thiscall)
12547 if (!DECL_STATIC_CHAIN (cfun->decl))
12548 return DX_REG;
12549 return AX_REG;
12551 else if (regparm < 3)
12553 if (!DECL_STATIC_CHAIN (cfun->decl))
12554 return CX_REG;
12555 else
12557 if (regparm >= 2)
12559 sorry ("-fsplit-stack does not support 2 register "
12560 "parameters for a nested function");
12561 return INVALID_REGNUM;
12563 return DX_REG;
12566 else
12568 /* FIXME: We could make this work by pushing a register
12569 around the addition and comparison. */
12570 sorry ("-fsplit-stack does not support 3 register parameters");
12571 return INVALID_REGNUM;
12576 /* A SYMBOL_REF for the function which allocates new stackspace for
12577 -fsplit-stack. */
12579 static GTY(()) rtx split_stack_fn;
12581 /* A SYMBOL_REF for the more stack function when using the large
12582 model. */
12584 static GTY(()) rtx split_stack_fn_large;
12586 /* Handle -fsplit-stack. These are the first instructions in the
12587 function, even before the regular prologue. */
12589 void
12590 ix86_expand_split_stack_prologue (void)
12592 struct ix86_frame frame;
12593 HOST_WIDE_INT allocate;
12594 unsigned HOST_WIDE_INT args_size;
12595 rtx_code_label *label;
12596 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12597 rtx scratch_reg = NULL_RTX;
12598 rtx_code_label *varargs_label = NULL;
12599 rtx fn;
12601 gcc_assert (flag_split_stack && reload_completed);
12603 ix86_finalize_stack_realign_flags ();
12604 ix86_compute_frame_layout (&frame);
12605 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12607 /* This is the label we will branch to if we have enough stack
12608 space. We expect the basic block reordering pass to reverse this
12609 branch if optimizing, so that we branch in the unlikely case. */
12610 label = gen_label_rtx ();
12612 /* We need to compare the stack pointer minus the frame size with
12613 the stack boundary in the TCB. The stack boundary always gives
12614 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12615 can compare directly. Otherwise we need to do an addition. */
12617 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12618 UNSPEC_STACK_CHECK);
12619 limit = gen_rtx_CONST (Pmode, limit);
12620 limit = gen_rtx_MEM (Pmode, limit);
12621 if (allocate < SPLIT_STACK_AVAILABLE)
12622 current = stack_pointer_rtx;
12623 else
12625 unsigned int scratch_regno;
12626 rtx offset;
12628 /* We need a scratch register to hold the stack pointer minus
12629 the required frame size. Since this is the very start of the
12630 function, the scratch register can be any caller-saved
12631 register which is not used for parameters. */
12632 offset = GEN_INT (- allocate);
12633 scratch_regno = split_stack_prologue_scratch_regno ();
12634 if (scratch_regno == INVALID_REGNUM)
12635 return;
12636 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12637 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12639 /* We don't use ix86_gen_add3 in this case because it will
12640 want to split to lea, but when not optimizing the insn
12641 will not be split after this point. */
12642 emit_insn (gen_rtx_SET (scratch_reg,
12643 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12644 offset)));
12646 else
12648 emit_move_insn (scratch_reg, offset);
12649 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12650 stack_pointer_rtx));
12652 current = scratch_reg;
12655 ix86_expand_branch (GEU, current, limit, label);
12656 jump_insn = get_last_insn ();
12657 JUMP_LABEL (jump_insn) = label;
12659 /* Mark the jump as very likely to be taken. */
12660 add_int_reg_note (jump_insn, REG_BR_PROB,
12661 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12663 if (split_stack_fn == NULL_RTX)
12665 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12666 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12668 fn = split_stack_fn;
12670 /* Get more stack space. We pass in the desired stack space and the
12671 size of the arguments to copy to the new stack. In 32-bit mode
12672 we push the parameters; __morestack will return on a new stack
12673 anyhow. In 64-bit mode we pass the parameters in r10 and
12674 r11. */
12675 allocate_rtx = GEN_INT (allocate);
12676 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12677 call_fusage = NULL_RTX;
12678 if (TARGET_64BIT)
12680 rtx reg10, reg11;
12682 reg10 = gen_rtx_REG (Pmode, R10_REG);
12683 reg11 = gen_rtx_REG (Pmode, R11_REG);
12685 /* If this function uses a static chain, it will be in %r10.
12686 Preserve it across the call to __morestack. */
12687 if (DECL_STATIC_CHAIN (cfun->decl))
12689 rtx rax;
12691 rax = gen_rtx_REG (word_mode, AX_REG);
12692 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12693 use_reg (&call_fusage, rax);
12696 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12697 && !TARGET_PECOFF)
12699 HOST_WIDE_INT argval;
12701 gcc_assert (Pmode == DImode);
12702 /* When using the large model we need to load the address
12703 into a register, and we've run out of registers. So we
12704 switch to a different calling convention, and we call a
12705 different function: __morestack_large. We pass the
12706 argument size in the upper 32 bits of r10 and pass the
12707 frame size in the lower 32 bits. */
12708 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12709 gcc_assert ((args_size & 0xffffffff) == args_size);
12711 if (split_stack_fn_large == NULL_RTX)
12713 split_stack_fn_large =
12714 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12715 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12717 if (ix86_cmodel == CM_LARGE_PIC)
12719 rtx_code_label *label;
12720 rtx x;
12722 label = gen_label_rtx ();
12723 emit_label (label);
12724 LABEL_PRESERVE_P (label) = 1;
12725 emit_insn (gen_set_rip_rex64 (reg10, label));
12726 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12727 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12728 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12729 UNSPEC_GOT);
12730 x = gen_rtx_CONST (Pmode, x);
12731 emit_move_insn (reg11, x);
12732 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12733 x = gen_const_mem (Pmode, x);
12734 emit_move_insn (reg11, x);
12736 else
12737 emit_move_insn (reg11, split_stack_fn_large);
12739 fn = reg11;
12741 argval = ((args_size << 16) << 16) + allocate;
12742 emit_move_insn (reg10, GEN_INT (argval));
12744 else
12746 emit_move_insn (reg10, allocate_rtx);
12747 emit_move_insn (reg11, GEN_INT (args_size));
12748 use_reg (&call_fusage, reg11);
12751 use_reg (&call_fusage, reg10);
12753 else
12755 emit_insn (gen_push (GEN_INT (args_size)));
12756 emit_insn (gen_push (allocate_rtx));
12758 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12759 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12760 NULL_RTX, false);
12761 add_function_usage_to (call_insn, call_fusage);
12763 /* In order to make call/return prediction work right, we now need
12764 to execute a return instruction. See
12765 libgcc/config/i386/morestack.S for the details on how this works.
12767 For flow purposes gcc must not see this as a return
12768 instruction--we need control flow to continue at the subsequent
12769 label. Therefore, we use an unspec. */
12770 gcc_assert (crtl->args.pops_args < 65536);
12771 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12773 /* If we are in 64-bit mode and this function uses a static chain,
12774 we saved %r10 in %rax before calling _morestack. */
12775 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12776 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12777 gen_rtx_REG (word_mode, AX_REG));
12779 /* If this function calls va_start, we need to store a pointer to
12780 the arguments on the old stack, because they may not have been
12781 all copied to the new stack. At this point the old stack can be
12782 found at the frame pointer value used by __morestack, because
12783 __morestack has set that up before calling back to us. Here we
12784 store that pointer in a scratch register, and in
12785 ix86_expand_prologue we store the scratch register in a stack
12786 slot. */
12787 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12789 unsigned int scratch_regno;
12790 rtx frame_reg;
12791 int words;
12793 scratch_regno = split_stack_prologue_scratch_regno ();
12794 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12795 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12797 /* 64-bit:
12798 fp -> old fp value
12799 return address within this function
12800 return address of caller of this function
12801 stack arguments
12802 So we add three words to get to the stack arguments.
12804 32-bit:
12805 fp -> old fp value
12806 return address within this function
12807 first argument to __morestack
12808 second argument to __morestack
12809 return address of caller of this function
12810 stack arguments
12811 So we add five words to get to the stack arguments.
12813 words = TARGET_64BIT ? 3 : 5;
12814 emit_insn (gen_rtx_SET (scratch_reg,
12815 gen_rtx_PLUS (Pmode, frame_reg,
12816 GEN_INT (words * UNITS_PER_WORD))));
12818 varargs_label = gen_label_rtx ();
12819 emit_jump_insn (gen_jump (varargs_label));
12820 JUMP_LABEL (get_last_insn ()) = varargs_label;
12822 emit_barrier ();
12825 emit_label (label);
12826 LABEL_NUSES (label) = 1;
12828 /* If this function calls va_start, we now have to set the scratch
12829 register for the case where we do not call __morestack. In this
12830 case we need to set it based on the stack pointer. */
12831 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12833 emit_insn (gen_rtx_SET (scratch_reg,
12834 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12835 GEN_INT (UNITS_PER_WORD))));
12837 emit_label (varargs_label);
12838 LABEL_NUSES (varargs_label) = 1;
12842 /* We may have to tell the dataflow pass that the split stack prologue
12843 is initializing a scratch register. */
12845 static void
12846 ix86_live_on_entry (bitmap regs)
12848 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12850 gcc_assert (flag_split_stack);
12851 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12855 /* Extract the parts of an RTL expression that is a valid memory address
12856 for an instruction. Return 0 if the structure of the address is
12857 grossly off. Return -1 if the address contains ASHIFT, so it is not
12858 strictly valid, but still used for computing length of lea instruction. */
12861 ix86_decompose_address (rtx addr, struct ix86_address *out)
12863 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12864 rtx base_reg, index_reg;
12865 HOST_WIDE_INT scale = 1;
12866 rtx scale_rtx = NULL_RTX;
12867 rtx tmp;
12868 int retval = 1;
12869 enum ix86_address_seg seg = SEG_DEFAULT;
12871 /* Allow zero-extended SImode addresses,
12872 they will be emitted with addr32 prefix. */
12873 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12875 if (GET_CODE (addr) == ZERO_EXTEND
12876 && GET_MODE (XEXP (addr, 0)) == SImode)
12878 addr = XEXP (addr, 0);
12879 if (CONST_INT_P (addr))
12880 return 0;
12882 else if (GET_CODE (addr) == AND
12883 && const_32bit_mask (XEXP (addr, 1), DImode))
12885 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12886 if (addr == NULL_RTX)
12887 return 0;
12889 if (CONST_INT_P (addr))
12890 return 0;
12894 /* Allow SImode subregs of DImode addresses,
12895 they will be emitted with addr32 prefix. */
12896 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12898 if (GET_CODE (addr) == SUBREG
12899 && GET_MODE (SUBREG_REG (addr)) == DImode)
12901 addr = SUBREG_REG (addr);
12902 if (CONST_INT_P (addr))
12903 return 0;
12907 if (REG_P (addr))
12908 base = addr;
12909 else if (GET_CODE (addr) == SUBREG)
12911 if (REG_P (SUBREG_REG (addr)))
12912 base = addr;
12913 else
12914 return 0;
12916 else if (GET_CODE (addr) == PLUS)
12918 rtx addends[4], op;
12919 int n = 0, i;
12921 op = addr;
12924 if (n >= 4)
12925 return 0;
12926 addends[n++] = XEXP (op, 1);
12927 op = XEXP (op, 0);
12929 while (GET_CODE (op) == PLUS);
12930 if (n >= 4)
12931 return 0;
12932 addends[n] = op;
12934 for (i = n; i >= 0; --i)
12936 op = addends[i];
12937 switch (GET_CODE (op))
12939 case MULT:
12940 if (index)
12941 return 0;
12942 index = XEXP (op, 0);
12943 scale_rtx = XEXP (op, 1);
12944 break;
12946 case ASHIFT:
12947 if (index)
12948 return 0;
12949 index = XEXP (op, 0);
12950 tmp = XEXP (op, 1);
12951 if (!CONST_INT_P (tmp))
12952 return 0;
12953 scale = INTVAL (tmp);
12954 if ((unsigned HOST_WIDE_INT) scale > 3)
12955 return 0;
12956 scale = 1 << scale;
12957 break;
12959 case ZERO_EXTEND:
12960 op = XEXP (op, 0);
12961 if (GET_CODE (op) != UNSPEC)
12962 return 0;
12963 /* FALLTHRU */
12965 case UNSPEC:
12966 if (XINT (op, 1) == UNSPEC_TP
12967 && TARGET_TLS_DIRECT_SEG_REFS
12968 && seg == SEG_DEFAULT)
12969 seg = DEFAULT_TLS_SEG_REG;
12970 else
12971 return 0;
12972 break;
12974 case SUBREG:
12975 if (!REG_P (SUBREG_REG (op)))
12976 return 0;
12977 /* FALLTHRU */
12979 case REG:
12980 if (!base)
12981 base = op;
12982 else if (!index)
12983 index = op;
12984 else
12985 return 0;
12986 break;
12988 case CONST:
12989 case CONST_INT:
12990 case SYMBOL_REF:
12991 case LABEL_REF:
12992 if (disp)
12993 return 0;
12994 disp = op;
12995 break;
12997 default:
12998 return 0;
13002 else if (GET_CODE (addr) == MULT)
13004 index = XEXP (addr, 0); /* index*scale */
13005 scale_rtx = XEXP (addr, 1);
13007 else if (GET_CODE (addr) == ASHIFT)
13009 /* We're called for lea too, which implements ashift on occasion. */
13010 index = XEXP (addr, 0);
13011 tmp = XEXP (addr, 1);
13012 if (!CONST_INT_P (tmp))
13013 return 0;
13014 scale = INTVAL (tmp);
13015 if ((unsigned HOST_WIDE_INT) scale > 3)
13016 return 0;
13017 scale = 1 << scale;
13018 retval = -1;
13020 else
13021 disp = addr; /* displacement */
13023 if (index)
13025 if (REG_P (index))
13027 else if (GET_CODE (index) == SUBREG
13028 && REG_P (SUBREG_REG (index)))
13030 else
13031 return 0;
13034 /* Extract the integral value of scale. */
13035 if (scale_rtx)
13037 if (!CONST_INT_P (scale_rtx))
13038 return 0;
13039 scale = INTVAL (scale_rtx);
13042 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
13043 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
13045 /* Avoid useless 0 displacement. */
13046 if (disp == const0_rtx && (base || index))
13047 disp = NULL_RTX;
13049 /* Allow arg pointer and stack pointer as index if there is not scaling. */
13050 if (base_reg && index_reg && scale == 1
13051 && (index_reg == arg_pointer_rtx
13052 || index_reg == frame_pointer_rtx
13053 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
13055 std::swap (base, index);
13056 std::swap (base_reg, index_reg);
13059 /* Special case: %ebp cannot be encoded as a base without a displacement.
13060 Similarly %r13. */
13061 if (!disp
13062 && base_reg
13063 && (base_reg == hard_frame_pointer_rtx
13064 || base_reg == frame_pointer_rtx
13065 || base_reg == arg_pointer_rtx
13066 || (REG_P (base_reg)
13067 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13068 || REGNO (base_reg) == R13_REG))))
13069 disp = const0_rtx;
13071 /* Special case: on K6, [%esi] makes the instruction vector decoded.
13072 Avoid this by transforming to [%esi+0].
13073 Reload calls address legitimization without cfun defined, so we need
13074 to test cfun for being non-NULL. */
13075 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13076 && base_reg && !index_reg && !disp
13077 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13078 disp = const0_rtx;
13080 /* Special case: encode reg+reg instead of reg*2. */
13081 if (!base && index && scale == 2)
13082 base = index, base_reg = index_reg, scale = 1;
13084 /* Special case: scaling cannot be encoded without base or displacement. */
13085 if (!base && !disp && index && scale != 1)
13086 disp = const0_rtx;
13088 out->base = base;
13089 out->index = index;
13090 out->disp = disp;
13091 out->scale = scale;
13092 out->seg = seg;
13094 return retval;
13097 /* Return cost of the memory address x.
13098 For i386, it is better to use a complex address than let gcc copy
13099 the address into a reg and make a new pseudo. But not if the address
13100 requires to two regs - that would mean more pseudos with longer
13101 lifetimes. */
13102 static int
13103 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13105 struct ix86_address parts;
13106 int cost = 1;
13107 int ok = ix86_decompose_address (x, &parts);
13109 gcc_assert (ok);
13111 if (parts.base && GET_CODE (parts.base) == SUBREG)
13112 parts.base = SUBREG_REG (parts.base);
13113 if (parts.index && GET_CODE (parts.index) == SUBREG)
13114 parts.index = SUBREG_REG (parts.index);
13116 /* Attempt to minimize number of registers in the address by increasing
13117 address cost for each used register. We don't increase address cost
13118 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
13119 is not invariant itself it most likely means that base or index is not
13120 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
13121 which is not profitable for x86. */
13122 if (parts.base
13123 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13124 && (current_pass->type == GIMPLE_PASS
13125 || !pic_offset_table_rtx
13126 || !REG_P (parts.base)
13127 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13128 cost++;
13130 if (parts.index
13131 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13132 && (current_pass->type == GIMPLE_PASS
13133 || !pic_offset_table_rtx
13134 || !REG_P (parts.index)
13135 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13136 cost++;
13138 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13139 since it's predecode logic can't detect the length of instructions
13140 and it degenerates to vector decoded. Increase cost of such
13141 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
13142 to split such addresses or even refuse such addresses at all.
13144 Following addressing modes are affected:
13145 [base+scale*index]
13146 [scale*index+disp]
13147 [base+index]
13149 The first and last case may be avoidable by explicitly coding the zero in
13150 memory address, but I don't have AMD-K6 machine handy to check this
13151 theory. */
13153 if (TARGET_K6
13154 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13155 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13156 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13157 cost += 10;
13159 return cost;
13162 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13163 this is used for to form addresses to local data when -fPIC is in
13164 use. */
13166 static bool
13167 darwin_local_data_pic (rtx disp)
13169 return (GET_CODE (disp) == UNSPEC
13170 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13173 /* Determine if a given RTX is a valid constant. We already know this
13174 satisfies CONSTANT_P. */
13176 static bool
13177 ix86_legitimate_constant_p (machine_mode, rtx x)
13179 /* Pointer bounds constants are not valid. */
13180 if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13181 return false;
13183 switch (GET_CODE (x))
13185 case CONST:
13186 x = XEXP (x, 0);
13188 if (GET_CODE (x) == PLUS)
13190 if (!CONST_INT_P (XEXP (x, 1)))
13191 return false;
13192 x = XEXP (x, 0);
13195 if (TARGET_MACHO && darwin_local_data_pic (x))
13196 return true;
13198 /* Only some unspecs are valid as "constants". */
13199 if (GET_CODE (x) == UNSPEC)
13200 switch (XINT (x, 1))
13202 case UNSPEC_GOT:
13203 case UNSPEC_GOTOFF:
13204 case UNSPEC_PLTOFF:
13205 return TARGET_64BIT;
13206 case UNSPEC_TPOFF:
13207 case UNSPEC_NTPOFF:
13208 x = XVECEXP (x, 0, 0);
13209 return (GET_CODE (x) == SYMBOL_REF
13210 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13211 case UNSPEC_DTPOFF:
13212 x = XVECEXP (x, 0, 0);
13213 return (GET_CODE (x) == SYMBOL_REF
13214 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13215 default:
13216 return false;
13219 /* We must have drilled down to a symbol. */
13220 if (GET_CODE (x) == LABEL_REF)
13221 return true;
13222 if (GET_CODE (x) != SYMBOL_REF)
13223 return false;
13224 /* FALLTHRU */
13226 case SYMBOL_REF:
13227 /* TLS symbols are never valid. */
13228 if (SYMBOL_REF_TLS_MODEL (x))
13229 return false;
13231 /* DLLIMPORT symbols are never valid. */
13232 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13233 && SYMBOL_REF_DLLIMPORT_P (x))
13234 return false;
13236 #if TARGET_MACHO
13237 /* mdynamic-no-pic */
13238 if (MACHO_DYNAMIC_NO_PIC_P)
13239 return machopic_symbol_defined_p (x);
13240 #endif
13241 break;
13243 case CONST_WIDE_INT:
13244 if (!TARGET_64BIT && !standard_sse_constant_p (x))
13245 return false;
13246 break;
13248 case CONST_VECTOR:
13249 if (!standard_sse_constant_p (x))
13250 return false;
13252 default:
13253 break;
13256 /* Otherwise we handle everything else in the move patterns. */
13257 return true;
13260 /* Determine if it's legal to put X into the constant pool. This
13261 is not possible for the address of thread-local symbols, which
13262 is checked above. */
13264 static bool
13265 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13267 /* We can always put integral constants and vectors in memory. */
13268 switch (GET_CODE (x))
13270 case CONST_INT:
13271 case CONST_WIDE_INT:
13272 case CONST_DOUBLE:
13273 case CONST_VECTOR:
13274 return false;
13276 default:
13277 break;
13279 return !ix86_legitimate_constant_p (mode, x);
13282 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
13283 otherwise zero. */
13285 static bool
13286 is_imported_p (rtx x)
13288 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13289 || GET_CODE (x) != SYMBOL_REF)
13290 return false;
13292 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13296 /* Nonzero if the constant value X is a legitimate general operand
13297 when generating PIC code. It is given that flag_pic is on and
13298 that X satisfies CONSTANT_P. */
13300 bool
13301 legitimate_pic_operand_p (rtx x)
13303 rtx inner;
13305 switch (GET_CODE (x))
13307 case CONST:
13308 inner = XEXP (x, 0);
13309 if (GET_CODE (inner) == PLUS
13310 && CONST_INT_P (XEXP (inner, 1)))
13311 inner = XEXP (inner, 0);
13313 /* Only some unspecs are valid as "constants". */
13314 if (GET_CODE (inner) == UNSPEC)
13315 switch (XINT (inner, 1))
13317 case UNSPEC_GOT:
13318 case UNSPEC_GOTOFF:
13319 case UNSPEC_PLTOFF:
13320 return TARGET_64BIT;
13321 case UNSPEC_TPOFF:
13322 x = XVECEXP (inner, 0, 0);
13323 return (GET_CODE (x) == SYMBOL_REF
13324 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13325 case UNSPEC_MACHOPIC_OFFSET:
13326 return legitimate_pic_address_disp_p (x);
13327 default:
13328 return false;
13330 /* FALLTHRU */
13332 case SYMBOL_REF:
13333 case LABEL_REF:
13334 return legitimate_pic_address_disp_p (x);
13336 default:
13337 return true;
13341 /* Determine if a given CONST RTX is a valid memory displacement
13342 in PIC mode. */
13344 bool
13345 legitimate_pic_address_disp_p (rtx disp)
13347 bool saw_plus;
13349 /* In 64bit mode we can allow direct addresses of symbols and labels
13350 when they are not dynamic symbols. */
13351 if (TARGET_64BIT)
13353 rtx op0 = disp, op1;
13355 switch (GET_CODE (disp))
13357 case LABEL_REF:
13358 return true;
13360 case CONST:
13361 if (GET_CODE (XEXP (disp, 0)) != PLUS)
13362 break;
13363 op0 = XEXP (XEXP (disp, 0), 0);
13364 op1 = XEXP (XEXP (disp, 0), 1);
13365 if (!CONST_INT_P (op1)
13366 || INTVAL (op1) >= 16*1024*1024
13367 || INTVAL (op1) < -16*1024*1024)
13368 break;
13369 if (GET_CODE (op0) == LABEL_REF)
13370 return true;
13371 if (GET_CODE (op0) == CONST
13372 && GET_CODE (XEXP (op0, 0)) == UNSPEC
13373 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13374 return true;
13375 if (GET_CODE (op0) == UNSPEC
13376 && XINT (op0, 1) == UNSPEC_PCREL)
13377 return true;
13378 if (GET_CODE (op0) != SYMBOL_REF)
13379 break;
13380 /* FALLTHRU */
13382 case SYMBOL_REF:
13383 /* TLS references should always be enclosed in UNSPEC.
13384 The dllimported symbol needs always to be resolved. */
13385 if (SYMBOL_REF_TLS_MODEL (op0)
13386 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13387 return false;
13389 if (TARGET_PECOFF)
13391 if (is_imported_p (op0))
13392 return true;
13394 if (SYMBOL_REF_FAR_ADDR_P (op0)
13395 || !SYMBOL_REF_LOCAL_P (op0))
13396 break;
13398 /* Function-symbols need to be resolved only for
13399 large-model.
13400 For the small-model we don't need to resolve anything
13401 here. */
13402 if ((ix86_cmodel != CM_LARGE_PIC
13403 && SYMBOL_REF_FUNCTION_P (op0))
13404 || ix86_cmodel == CM_SMALL_PIC)
13405 return true;
13406 /* Non-external symbols don't need to be resolved for
13407 large, and medium-model. */
13408 if ((ix86_cmodel == CM_LARGE_PIC
13409 || ix86_cmodel == CM_MEDIUM_PIC)
13410 && !SYMBOL_REF_EXTERNAL_P (op0))
13411 return true;
13413 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13414 && (SYMBOL_REF_LOCAL_P (op0)
13415 || (HAVE_LD_PIE_COPYRELOC
13416 && flag_pie
13417 && !SYMBOL_REF_WEAK (op0)
13418 && !SYMBOL_REF_FUNCTION_P (op0)))
13419 && ix86_cmodel != CM_LARGE_PIC)
13420 return true;
13421 break;
13423 default:
13424 break;
13427 if (GET_CODE (disp) != CONST)
13428 return false;
13429 disp = XEXP (disp, 0);
13431 if (TARGET_64BIT)
13433 /* We are unsafe to allow PLUS expressions. This limit allowed distance
13434 of GOT tables. We should not need these anyway. */
13435 if (GET_CODE (disp) != UNSPEC
13436 || (XINT (disp, 1) != UNSPEC_GOTPCREL
13437 && XINT (disp, 1) != UNSPEC_GOTOFF
13438 && XINT (disp, 1) != UNSPEC_PCREL
13439 && XINT (disp, 1) != UNSPEC_PLTOFF))
13440 return false;
13442 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13443 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13444 return false;
13445 return true;
13448 saw_plus = false;
13449 if (GET_CODE (disp) == PLUS)
13451 if (!CONST_INT_P (XEXP (disp, 1)))
13452 return false;
13453 disp = XEXP (disp, 0);
13454 saw_plus = true;
13457 if (TARGET_MACHO && darwin_local_data_pic (disp))
13458 return true;
13460 if (GET_CODE (disp) != UNSPEC)
13461 return false;
13463 switch (XINT (disp, 1))
13465 case UNSPEC_GOT:
13466 if (saw_plus)
13467 return false;
13468 /* We need to check for both symbols and labels because VxWorks loads
13469 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
13470 details. */
13471 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13472 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13473 case UNSPEC_GOTOFF:
13474 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13475 While ABI specify also 32bit relocation but we don't produce it in
13476 small PIC model at all. */
13477 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13478 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13479 && !TARGET_64BIT)
13480 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13481 return false;
13482 case UNSPEC_GOTTPOFF:
13483 case UNSPEC_GOTNTPOFF:
13484 case UNSPEC_INDNTPOFF:
13485 if (saw_plus)
13486 return false;
13487 disp = XVECEXP (disp, 0, 0);
13488 return (GET_CODE (disp) == SYMBOL_REF
13489 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13490 case UNSPEC_NTPOFF:
13491 disp = XVECEXP (disp, 0, 0);
13492 return (GET_CODE (disp) == SYMBOL_REF
13493 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13494 case UNSPEC_DTPOFF:
13495 disp = XVECEXP (disp, 0, 0);
13496 return (GET_CODE (disp) == SYMBOL_REF
13497 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13500 return false;
13503 /* Determine if op is suitable RTX for an address register.
13504 Return naked register if a register or a register subreg is
13505 found, otherwise return NULL_RTX. */
13507 static rtx
13508 ix86_validate_address_register (rtx op)
13510 machine_mode mode = GET_MODE (op);
13512 /* Only SImode or DImode registers can form the address. */
13513 if (mode != SImode && mode != DImode)
13514 return NULL_RTX;
13516 if (REG_P (op))
13517 return op;
13518 else if (GET_CODE (op) == SUBREG)
13520 rtx reg = SUBREG_REG (op);
13522 if (!REG_P (reg))
13523 return NULL_RTX;
13525 mode = GET_MODE (reg);
13527 /* Don't allow SUBREGs that span more than a word. It can
13528 lead to spill failures when the register is one word out
13529 of a two word structure. */
13530 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13531 return NULL_RTX;
13533 /* Allow only SUBREGs of non-eliminable hard registers. */
13534 if (register_no_elim_operand (reg, mode))
13535 return reg;
13538 /* Op is not a register. */
13539 return NULL_RTX;
13542 /* Recognizes RTL expressions that are valid memory addresses for an
13543 instruction. The MODE argument is the machine mode for the MEM
13544 expression that wants to use this address.
13546 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
13547 convert common non-canonical forms to canonical form so that they will
13548 be recognized. */
13550 static bool
13551 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13553 struct ix86_address parts;
13554 rtx base, index, disp;
13555 HOST_WIDE_INT scale;
13556 enum ix86_address_seg seg;
13558 if (ix86_decompose_address (addr, &parts) <= 0)
13559 /* Decomposition failed. */
13560 return false;
13562 base = parts.base;
13563 index = parts.index;
13564 disp = parts.disp;
13565 scale = parts.scale;
13566 seg = parts.seg;
13568 /* Validate base register. */
13569 if (base)
13571 rtx reg = ix86_validate_address_register (base);
13573 if (reg == NULL_RTX)
13574 return false;
13576 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13577 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13578 /* Base is not valid. */
13579 return false;
13582 /* Validate index register. */
13583 if (index)
13585 rtx reg = ix86_validate_address_register (index);
13587 if (reg == NULL_RTX)
13588 return false;
13590 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13591 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13592 /* Index is not valid. */
13593 return false;
13596 /* Index and base should have the same mode. */
13597 if (base && index
13598 && GET_MODE (base) != GET_MODE (index))
13599 return false;
13601 /* Address override works only on the (%reg) part of %fs:(%reg). */
13602 if (seg != SEG_DEFAULT
13603 && ((base && GET_MODE (base) != word_mode)
13604 || (index && GET_MODE (index) != word_mode)))
13605 return false;
13607 /* Validate scale factor. */
13608 if (scale != 1)
13610 if (!index)
13611 /* Scale without index. */
13612 return false;
13614 if (scale != 2 && scale != 4 && scale != 8)
13615 /* Scale is not a valid multiplier. */
13616 return false;
13619 /* Validate displacement. */
13620 if (disp)
13622 if (GET_CODE (disp) == CONST
13623 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13624 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13625 switch (XINT (XEXP (disp, 0), 1))
13627 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13628 used. While ABI specify also 32bit relocations, we don't produce
13629 them at all and use IP relative instead. */
13630 case UNSPEC_GOT:
13631 case UNSPEC_GOTOFF:
13632 gcc_assert (flag_pic);
13633 if (!TARGET_64BIT)
13634 goto is_legitimate_pic;
13636 /* 64bit address unspec. */
13637 return false;
13639 case UNSPEC_GOTPCREL:
13640 case UNSPEC_PCREL:
13641 gcc_assert (flag_pic);
13642 goto is_legitimate_pic;
13644 case UNSPEC_GOTTPOFF:
13645 case UNSPEC_GOTNTPOFF:
13646 case UNSPEC_INDNTPOFF:
13647 case UNSPEC_NTPOFF:
13648 case UNSPEC_DTPOFF:
13649 break;
13651 case UNSPEC_STACK_CHECK:
13652 gcc_assert (flag_split_stack);
13653 break;
13655 default:
13656 /* Invalid address unspec. */
13657 return false;
13660 else if (SYMBOLIC_CONST (disp)
13661 && (flag_pic
13662 || (TARGET_MACHO
13663 #if TARGET_MACHO
13664 && MACHOPIC_INDIRECT
13665 && !machopic_operand_p (disp)
13666 #endif
13670 is_legitimate_pic:
13671 if (TARGET_64BIT && (index || base))
13673 /* foo@dtpoff(%rX) is ok. */
13674 if (GET_CODE (disp) != CONST
13675 || GET_CODE (XEXP (disp, 0)) != PLUS
13676 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13677 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13678 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13679 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13680 /* Non-constant pic memory reference. */
13681 return false;
13683 else if ((!TARGET_MACHO || flag_pic)
13684 && ! legitimate_pic_address_disp_p (disp))
13685 /* Displacement is an invalid pic construct. */
13686 return false;
13687 #if TARGET_MACHO
13688 else if (MACHO_DYNAMIC_NO_PIC_P
13689 && !ix86_legitimate_constant_p (Pmode, disp))
13690 /* displacment must be referenced via non_lazy_pointer */
13691 return false;
13692 #endif
13694 /* This code used to verify that a symbolic pic displacement
13695 includes the pic_offset_table_rtx register.
13697 While this is good idea, unfortunately these constructs may
13698 be created by "adds using lea" optimization for incorrect
13699 code like:
13701 int a;
13702 int foo(int i)
13704 return *(&a+i);
13707 This code is nonsensical, but results in addressing
13708 GOT table with pic_offset_table_rtx base. We can't
13709 just refuse it easily, since it gets matched by
13710 "addsi3" pattern, that later gets split to lea in the
13711 case output register differs from input. While this
13712 can be handled by separate addsi pattern for this case
13713 that never results in lea, this seems to be easier and
13714 correct fix for crash to disable this test. */
13716 else if (GET_CODE (disp) != LABEL_REF
13717 && !CONST_INT_P (disp)
13718 && (GET_CODE (disp) != CONST
13719 || !ix86_legitimate_constant_p (Pmode, disp))
13720 && (GET_CODE (disp) != SYMBOL_REF
13721 || !ix86_legitimate_constant_p (Pmode, disp)))
13722 /* Displacement is not constant. */
13723 return false;
13724 else if (TARGET_64BIT
13725 && !x86_64_immediate_operand (disp, VOIDmode))
13726 /* Displacement is out of range. */
13727 return false;
13728 /* In x32 mode, constant addresses are sign extended to 64bit, so
13729 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13730 else if (TARGET_X32 && !(index || base)
13731 && CONST_INT_P (disp)
13732 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13733 return false;
13736 /* Everything looks valid. */
13737 return true;
13740 /* Determine if a given RTX is a valid constant address. */
13742 bool
13743 constant_address_p (rtx x)
13745 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13748 /* Return a unique alias set for the GOT. */
13750 static alias_set_type
13751 ix86_GOT_alias_set (void)
13753 static alias_set_type set = -1;
13754 if (set == -1)
13755 set = new_alias_set ();
13756 return set;
13759 /* Return a legitimate reference for ORIG (an address) using the
13760 register REG. If REG is 0, a new pseudo is generated.
13762 There are two types of references that must be handled:
13764 1. Global data references must load the address from the GOT, via
13765 the PIC reg. An insn is emitted to do this load, and the reg is
13766 returned.
13768 2. Static data references, constant pool addresses, and code labels
13769 compute the address as an offset from the GOT, whose base is in
13770 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13771 differentiate them from global data objects. The returned
13772 address is the PIC reg + an unspec constant.
13774 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13775 reg also appears in the address. */
13777 static rtx
13778 legitimize_pic_address (rtx orig, rtx reg)
13780 rtx addr = orig;
13781 rtx new_rtx = orig;
13783 #if TARGET_MACHO
13784 if (TARGET_MACHO && !TARGET_64BIT)
13786 if (reg == 0)
13787 reg = gen_reg_rtx (Pmode);
13788 /* Use the generic Mach-O PIC machinery. */
13789 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13791 #endif
13793 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13795 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13796 if (tmp)
13797 return tmp;
13800 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13801 new_rtx = addr;
13802 else if (TARGET_64BIT && !TARGET_PECOFF
13803 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13805 rtx tmpreg;
13806 /* This symbol may be referenced via a displacement from the PIC
13807 base address (@GOTOFF). */
13809 if (GET_CODE (addr) == CONST)
13810 addr = XEXP (addr, 0);
13811 if (GET_CODE (addr) == PLUS)
13813 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13814 UNSPEC_GOTOFF);
13815 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13817 else
13818 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13819 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13820 if (!reg)
13821 tmpreg = gen_reg_rtx (Pmode);
13822 else
13823 tmpreg = reg;
13824 emit_move_insn (tmpreg, new_rtx);
13826 if (reg != 0)
13828 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13829 tmpreg, 1, OPTAB_DIRECT);
13830 new_rtx = reg;
13832 else
13833 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13835 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13837 /* This symbol may be referenced via a displacement from the PIC
13838 base address (@GOTOFF). */
13840 if (GET_CODE (addr) == CONST)
13841 addr = XEXP (addr, 0);
13842 if (GET_CODE (addr) == PLUS)
13844 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13845 UNSPEC_GOTOFF);
13846 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13848 else
13849 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13850 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13851 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13853 if (reg != 0)
13855 emit_move_insn (reg, new_rtx);
13856 new_rtx = reg;
13859 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13860 /* We can't use @GOTOFF for text labels on VxWorks;
13861 see gotoff_operand. */
13862 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13864 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13865 if (tmp)
13866 return tmp;
13868 /* For x64 PE-COFF there is no GOT table. So we use address
13869 directly. */
13870 if (TARGET_64BIT && TARGET_PECOFF)
13872 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13873 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13875 if (reg == 0)
13876 reg = gen_reg_rtx (Pmode);
13877 emit_move_insn (reg, new_rtx);
13878 new_rtx = reg;
13880 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13882 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13883 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13884 new_rtx = gen_const_mem (Pmode, new_rtx);
13885 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13887 if (reg == 0)
13888 reg = gen_reg_rtx (Pmode);
13889 /* Use directly gen_movsi, otherwise the address is loaded
13890 into register for CSE. We don't want to CSE this addresses,
13891 instead we CSE addresses from the GOT table, so skip this. */
13892 emit_insn (gen_movsi (reg, new_rtx));
13893 new_rtx = reg;
13895 else
13897 /* This symbol must be referenced via a load from the
13898 Global Offset Table (@GOT). */
13900 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13901 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13902 if (TARGET_64BIT)
13903 new_rtx = force_reg (Pmode, new_rtx);
13904 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13905 new_rtx = gen_const_mem (Pmode, new_rtx);
13906 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13908 if (reg == 0)
13909 reg = gen_reg_rtx (Pmode);
13910 emit_move_insn (reg, new_rtx);
13911 new_rtx = reg;
13914 else
13916 if (CONST_INT_P (addr)
13917 && !x86_64_immediate_operand (addr, VOIDmode))
13919 if (reg)
13921 emit_move_insn (reg, addr);
13922 new_rtx = reg;
13924 else
13925 new_rtx = force_reg (Pmode, addr);
13927 else if (GET_CODE (addr) == CONST)
13929 addr = XEXP (addr, 0);
13931 /* We must match stuff we generate before. Assume the only
13932 unspecs that can get here are ours. Not that we could do
13933 anything with them anyway.... */
13934 if (GET_CODE (addr) == UNSPEC
13935 || (GET_CODE (addr) == PLUS
13936 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13937 return orig;
13938 gcc_assert (GET_CODE (addr) == PLUS);
13940 if (GET_CODE (addr) == PLUS)
13942 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13944 /* Check first to see if this is a constant offset from a @GOTOFF
13945 symbol reference. */
13946 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13947 && CONST_INT_P (op1))
13949 if (!TARGET_64BIT)
13951 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13952 UNSPEC_GOTOFF);
13953 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13954 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13955 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13957 if (reg != 0)
13959 emit_move_insn (reg, new_rtx);
13960 new_rtx = reg;
13963 else
13965 if (INTVAL (op1) < -16*1024*1024
13966 || INTVAL (op1) >= 16*1024*1024)
13968 if (!x86_64_immediate_operand (op1, Pmode))
13969 op1 = force_reg (Pmode, op1);
13970 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13974 else
13976 rtx base = legitimize_pic_address (op0, reg);
13977 machine_mode mode = GET_MODE (base);
13978 new_rtx
13979 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13981 if (CONST_INT_P (new_rtx))
13983 if (INTVAL (new_rtx) < -16*1024*1024
13984 || INTVAL (new_rtx) >= 16*1024*1024)
13986 if (!x86_64_immediate_operand (new_rtx, mode))
13987 new_rtx = force_reg (mode, new_rtx);
13988 new_rtx
13989 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13991 else
13992 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13994 else
13996 /* For %rip addressing, we have to use just disp32, not
13997 base nor index. */
13998 if (TARGET_64BIT
13999 && (GET_CODE (base) == SYMBOL_REF
14000 || GET_CODE (base) == LABEL_REF))
14001 base = force_reg (mode, base);
14002 if (GET_CODE (new_rtx) == PLUS
14003 && CONSTANT_P (XEXP (new_rtx, 1)))
14005 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
14006 new_rtx = XEXP (new_rtx, 1);
14008 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
14013 return new_rtx;
14016 /* Load the thread pointer. If TO_REG is true, force it into a register. */
14018 static rtx
14019 get_thread_pointer (machine_mode tp_mode, bool to_reg)
14021 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
14023 if (GET_MODE (tp) != tp_mode)
14025 gcc_assert (GET_MODE (tp) == SImode);
14026 gcc_assert (tp_mode == DImode);
14028 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
14031 if (to_reg)
14032 tp = copy_to_mode_reg (tp_mode, tp);
14034 return tp;
14037 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14039 static GTY(()) rtx ix86_tls_symbol;
14041 static rtx
14042 ix86_tls_get_addr (void)
14044 if (!ix86_tls_symbol)
14046 const char *sym
14047 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
14048 ? "___tls_get_addr" : "__tls_get_addr");
14050 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14053 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14055 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14056 UNSPEC_PLTOFF);
14057 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14058 gen_rtx_CONST (Pmode, unspec));
14061 return ix86_tls_symbol;
14064 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14066 static GTY(()) rtx ix86_tls_module_base_symbol;
14069 ix86_tls_module_base (void)
14071 if (!ix86_tls_module_base_symbol)
14073 ix86_tls_module_base_symbol
14074 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14076 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14077 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14080 return ix86_tls_module_base_symbol;
14083 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
14084 false if we expect this to be used for a memory address and true if
14085 we expect to load the address into a register. */
14087 static rtx
14088 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14090 rtx dest, base, off;
14091 rtx pic = NULL_RTX, tp = NULL_RTX;
14092 machine_mode tp_mode = Pmode;
14093 int type;
14095 /* Fall back to global dynamic model if tool chain cannot support local
14096 dynamic. */
14097 if (TARGET_SUN_TLS && !TARGET_64BIT
14098 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14099 && model == TLS_MODEL_LOCAL_DYNAMIC)
14100 model = TLS_MODEL_GLOBAL_DYNAMIC;
14102 switch (model)
14104 case TLS_MODEL_GLOBAL_DYNAMIC:
14105 dest = gen_reg_rtx (Pmode);
14107 if (!TARGET_64BIT)
14109 if (flag_pic && !TARGET_PECOFF)
14110 pic = pic_offset_table_rtx;
14111 else
14113 pic = gen_reg_rtx (Pmode);
14114 emit_insn (gen_set_got (pic));
14118 if (TARGET_GNU2_TLS)
14120 if (TARGET_64BIT)
14121 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14122 else
14123 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14125 tp = get_thread_pointer (Pmode, true);
14126 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14128 if (GET_MODE (x) != Pmode)
14129 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14131 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14133 else
14135 rtx caddr = ix86_tls_get_addr ();
14137 if (TARGET_64BIT)
14139 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14140 rtx_insn *insns;
14142 start_sequence ();
14143 emit_call_insn
14144 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14145 insns = get_insns ();
14146 end_sequence ();
14148 if (GET_MODE (x) != Pmode)
14149 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14151 RTL_CONST_CALL_P (insns) = 1;
14152 emit_libcall_block (insns, dest, rax, x);
14154 else
14155 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14157 break;
14159 case TLS_MODEL_LOCAL_DYNAMIC:
14160 base = gen_reg_rtx (Pmode);
14162 if (!TARGET_64BIT)
14164 if (flag_pic)
14165 pic = pic_offset_table_rtx;
14166 else
14168 pic = gen_reg_rtx (Pmode);
14169 emit_insn (gen_set_got (pic));
14173 if (TARGET_GNU2_TLS)
14175 rtx tmp = ix86_tls_module_base ();
14177 if (TARGET_64BIT)
14178 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14179 else
14180 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14182 tp = get_thread_pointer (Pmode, true);
14183 set_unique_reg_note (get_last_insn (), REG_EQUAL,
14184 gen_rtx_MINUS (Pmode, tmp, tp));
14186 else
14188 rtx caddr = ix86_tls_get_addr ();
14190 if (TARGET_64BIT)
14192 rtx rax = gen_rtx_REG (Pmode, AX_REG);
14193 rtx_insn *insns;
14194 rtx eqv;
14196 start_sequence ();
14197 emit_call_insn
14198 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14199 insns = get_insns ();
14200 end_sequence ();
14202 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14203 share the LD_BASE result with other LD model accesses. */
14204 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14205 UNSPEC_TLS_LD_BASE);
14207 RTL_CONST_CALL_P (insns) = 1;
14208 emit_libcall_block (insns, base, rax, eqv);
14210 else
14211 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14214 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14215 off = gen_rtx_CONST (Pmode, off);
14217 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14219 if (TARGET_GNU2_TLS)
14221 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14223 if (GET_MODE (x) != Pmode)
14224 x = gen_rtx_ZERO_EXTEND (Pmode, x);
14226 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14228 break;
14230 case TLS_MODEL_INITIAL_EXEC:
14231 if (TARGET_64BIT)
14233 if (TARGET_SUN_TLS && !TARGET_X32)
14235 /* The Sun linker took the AMD64 TLS spec literally
14236 and can only handle %rax as destination of the
14237 initial executable code sequence. */
14239 dest = gen_reg_rtx (DImode);
14240 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14241 return dest;
14244 /* Generate DImode references to avoid %fs:(%reg32)
14245 problems and linker IE->LE relaxation bug. */
14246 tp_mode = DImode;
14247 pic = NULL;
14248 type = UNSPEC_GOTNTPOFF;
14250 else if (flag_pic)
14252 pic = pic_offset_table_rtx;
14253 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14255 else if (!TARGET_ANY_GNU_TLS)
14257 pic = gen_reg_rtx (Pmode);
14258 emit_insn (gen_set_got (pic));
14259 type = UNSPEC_GOTTPOFF;
14261 else
14263 pic = NULL;
14264 type = UNSPEC_INDNTPOFF;
14267 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14268 off = gen_rtx_CONST (tp_mode, off);
14269 if (pic)
14270 off = gen_rtx_PLUS (tp_mode, pic, off);
14271 off = gen_const_mem (tp_mode, off);
14272 set_mem_alias_set (off, ix86_GOT_alias_set ());
14274 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14276 base = get_thread_pointer (tp_mode,
14277 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14278 off = force_reg (tp_mode, off);
14279 return gen_rtx_PLUS (tp_mode, base, off);
14281 else
14283 base = get_thread_pointer (Pmode, true);
14284 dest = gen_reg_rtx (Pmode);
14285 emit_insn (ix86_gen_sub3 (dest, base, off));
14287 break;
14289 case TLS_MODEL_LOCAL_EXEC:
14290 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14291 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14292 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14293 off = gen_rtx_CONST (Pmode, off);
14295 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14297 base = get_thread_pointer (Pmode,
14298 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14299 return gen_rtx_PLUS (Pmode, base, off);
14301 else
14303 base = get_thread_pointer (Pmode, true);
14304 dest = gen_reg_rtx (Pmode);
14305 emit_insn (ix86_gen_sub3 (dest, base, off));
14307 break;
14309 default:
14310 gcc_unreachable ();
14313 return dest;
14316 /* Create or return the unique __imp_DECL dllimport symbol corresponding
14317 to symbol DECL if BEIMPORT is true. Otherwise create or return the
14318 unique refptr-DECL symbol corresponding to symbol DECL. */
14320 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
14322 static inline hashval_t hash (tree_map *m) { return m->hash; }
14323 static inline bool
14324 equal (tree_map *a, tree_map *b)
14326 return a->base.from == b->base.from;
14329 static int
14330 keep_cache_entry (tree_map *&m)
14332 return ggc_marked_p (m->base.from);
14336 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14338 static tree
14339 get_dllimport_decl (tree decl, bool beimport)
14341 struct tree_map *h, in;
14342 const char *name;
14343 const char *prefix;
14344 size_t namelen, prefixlen;
14345 char *imp_name;
14346 tree to;
14347 rtx rtl;
14349 if (!dllimport_map)
14350 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14352 in.hash = htab_hash_pointer (decl);
14353 in.base.from = decl;
14354 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14355 h = *loc;
14356 if (h)
14357 return h->to;
14359 *loc = h = ggc_alloc<tree_map> ();
14360 h->hash = in.hash;
14361 h->base.from = decl;
14362 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14363 VAR_DECL, NULL, ptr_type_node);
14364 DECL_ARTIFICIAL (to) = 1;
14365 DECL_IGNORED_P (to) = 1;
14366 DECL_EXTERNAL (to) = 1;
14367 TREE_READONLY (to) = 1;
14369 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14370 name = targetm.strip_name_encoding (name);
14371 if (beimport)
14372 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14373 ? "*__imp_" : "*__imp__";
14374 else
14375 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14376 namelen = strlen (name);
14377 prefixlen = strlen (prefix);
14378 imp_name = (char *) alloca (namelen + prefixlen + 1);
14379 memcpy (imp_name, prefix, prefixlen);
14380 memcpy (imp_name + prefixlen, name, namelen + 1);
14382 name = ggc_alloc_string (imp_name, namelen + prefixlen);
14383 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14384 SET_SYMBOL_REF_DECL (rtl, to);
14385 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14386 if (!beimport)
14388 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14389 #ifdef SUB_TARGET_RECORD_STUB
14390 SUB_TARGET_RECORD_STUB (name);
14391 #endif
14394 rtl = gen_const_mem (Pmode, rtl);
14395 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14397 SET_DECL_RTL (to, rtl);
14398 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14400 return to;
14403 /* Expand SYMBOL into its corresponding far-addresse symbol.
14404 WANT_REG is true if we require the result be a register. */
14406 static rtx
14407 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14409 tree imp_decl;
14410 rtx x;
14412 gcc_assert (SYMBOL_REF_DECL (symbol));
14413 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14415 x = DECL_RTL (imp_decl);
14416 if (want_reg)
14417 x = force_reg (Pmode, x);
14418 return x;
14421 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
14422 true if we require the result be a register. */
14424 static rtx
14425 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14427 tree imp_decl;
14428 rtx x;
14430 gcc_assert (SYMBOL_REF_DECL (symbol));
14431 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14433 x = DECL_RTL (imp_decl);
14434 if (want_reg)
14435 x = force_reg (Pmode, x);
14436 return x;
14439 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
14440 is true if we require the result be a register. */
14442 static rtx
14443 legitimize_pe_coff_symbol (rtx addr, bool inreg)
14445 if (!TARGET_PECOFF)
14446 return NULL_RTX;
14448 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14450 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14451 return legitimize_dllimport_symbol (addr, inreg);
14452 if (GET_CODE (addr) == CONST
14453 && GET_CODE (XEXP (addr, 0)) == PLUS
14454 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14455 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14457 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14458 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14462 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14463 return NULL_RTX;
14464 if (GET_CODE (addr) == SYMBOL_REF
14465 && !is_imported_p (addr)
14466 && SYMBOL_REF_EXTERNAL_P (addr)
14467 && SYMBOL_REF_DECL (addr))
14468 return legitimize_pe_coff_extern_decl (addr, inreg);
14470 if (GET_CODE (addr) == CONST
14471 && GET_CODE (XEXP (addr, 0)) == PLUS
14472 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14473 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14474 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14475 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14477 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14478 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14480 return NULL_RTX;
14483 /* Try machine-dependent ways of modifying an illegitimate address
14484 to be legitimate. If we find one, return the new, valid address.
14485 This macro is used in only one place: `memory_address' in explow.c.
14487 OLDX is the address as it was before break_out_memory_refs was called.
14488 In some cases it is useful to look at this to decide what needs to be done.
14490 It is always safe for this macro to do nothing. It exists to recognize
14491 opportunities to optimize the output.
14493 For the 80386, we handle X+REG by loading X into a register R and
14494 using R+REG. R will go in a general reg and indexing will be used.
14495 However, if REG is a broken-out memory address or multiplication,
14496 nothing needs to be done because REG can certainly go in a general reg.
14498 When -fpic is used, special handling is needed for symbolic references.
14499 See comments by legitimize_pic_address in i386.c for details. */
14501 static rtx
14502 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14504 bool changed = false;
14505 unsigned log;
14507 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14508 if (log)
14509 return legitimize_tls_address (x, (enum tls_model) log, false);
14510 if (GET_CODE (x) == CONST
14511 && GET_CODE (XEXP (x, 0)) == PLUS
14512 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14513 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14515 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14516 (enum tls_model) log, false);
14517 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14520 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14522 rtx tmp = legitimize_pe_coff_symbol (x, true);
14523 if (tmp)
14524 return tmp;
14527 if (flag_pic && SYMBOLIC_CONST (x))
14528 return legitimize_pic_address (x, 0);
14530 #if TARGET_MACHO
14531 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14532 return machopic_indirect_data_reference (x, 0);
14533 #endif
14535 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14536 if (GET_CODE (x) == ASHIFT
14537 && CONST_INT_P (XEXP (x, 1))
14538 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14540 changed = true;
14541 log = INTVAL (XEXP (x, 1));
14542 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14543 GEN_INT (1 << log));
14546 if (GET_CODE (x) == PLUS)
14548 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
14550 if (GET_CODE (XEXP (x, 0)) == ASHIFT
14551 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14552 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14554 changed = true;
14555 log = INTVAL (XEXP (XEXP (x, 0), 1));
14556 XEXP (x, 0) = gen_rtx_MULT (Pmode,
14557 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14558 GEN_INT (1 << log));
14561 if (GET_CODE (XEXP (x, 1)) == ASHIFT
14562 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14563 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14565 changed = true;
14566 log = INTVAL (XEXP (XEXP (x, 1), 1));
14567 XEXP (x, 1) = gen_rtx_MULT (Pmode,
14568 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14569 GEN_INT (1 << log));
14572 /* Put multiply first if it isn't already. */
14573 if (GET_CODE (XEXP (x, 1)) == MULT)
14575 std::swap (XEXP (x, 0), XEXP (x, 1));
14576 changed = true;
14579 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14580 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
14581 created by virtual register instantiation, register elimination, and
14582 similar optimizations. */
14583 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14585 changed = true;
14586 x = gen_rtx_PLUS (Pmode,
14587 gen_rtx_PLUS (Pmode, XEXP (x, 0),
14588 XEXP (XEXP (x, 1), 0)),
14589 XEXP (XEXP (x, 1), 1));
14592 /* Canonicalize
14593 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14594 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
14595 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14596 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14597 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14598 && CONSTANT_P (XEXP (x, 1)))
14600 rtx constant;
14601 rtx other = NULL_RTX;
14603 if (CONST_INT_P (XEXP (x, 1)))
14605 constant = XEXP (x, 1);
14606 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14608 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14610 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14611 other = XEXP (x, 1);
14613 else
14614 constant = 0;
14616 if (constant)
14618 changed = true;
14619 x = gen_rtx_PLUS (Pmode,
14620 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14621 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14622 plus_constant (Pmode, other,
14623 INTVAL (constant)));
14627 if (changed && ix86_legitimate_address_p (mode, x, false))
14628 return x;
14630 if (GET_CODE (XEXP (x, 0)) == MULT)
14632 changed = true;
14633 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14636 if (GET_CODE (XEXP (x, 1)) == MULT)
14638 changed = true;
14639 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14642 if (changed
14643 && REG_P (XEXP (x, 1))
14644 && REG_P (XEXP (x, 0)))
14645 return x;
14647 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14649 changed = true;
14650 x = legitimize_pic_address (x, 0);
14653 if (changed && ix86_legitimate_address_p (mode, x, false))
14654 return x;
14656 if (REG_P (XEXP (x, 0)))
14658 rtx temp = gen_reg_rtx (Pmode);
14659 rtx val = force_operand (XEXP (x, 1), temp);
14660 if (val != temp)
14662 val = convert_to_mode (Pmode, val, 1);
14663 emit_move_insn (temp, val);
14666 XEXP (x, 1) = temp;
14667 return x;
14670 else if (REG_P (XEXP (x, 1)))
14672 rtx temp = gen_reg_rtx (Pmode);
14673 rtx val = force_operand (XEXP (x, 0), temp);
14674 if (val != temp)
14676 val = convert_to_mode (Pmode, val, 1);
14677 emit_move_insn (temp, val);
14680 XEXP (x, 0) = temp;
14681 return x;
14685 return x;
14688 /* Print an integer constant expression in assembler syntax. Addition
14689 and subtraction are the only arithmetic that may appear in these
14690 expressions. FILE is the stdio stream to write to, X is the rtx, and
14691 CODE is the operand print code from the output string. */
14693 static void
14694 output_pic_addr_const (FILE *file, rtx x, int code)
14696 char buf[256];
14698 switch (GET_CODE (x))
14700 case PC:
14701 gcc_assert (flag_pic);
14702 putc ('.', file);
14703 break;
14705 case SYMBOL_REF:
14706 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14707 output_addr_const (file, x);
14708 else
14710 const char *name = XSTR (x, 0);
14712 /* Mark the decl as referenced so that cgraph will
14713 output the function. */
14714 if (SYMBOL_REF_DECL (x))
14715 mark_decl_referenced (SYMBOL_REF_DECL (x));
14717 #if TARGET_MACHO
14718 if (MACHOPIC_INDIRECT
14719 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14720 name = machopic_indirection_name (x, /*stub_p=*/true);
14721 #endif
14722 assemble_name (file, name);
14724 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14725 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14726 fputs ("@PLT", file);
14727 break;
14729 case LABEL_REF:
14730 x = XEXP (x, 0);
14731 /* FALLTHRU */
14732 case CODE_LABEL:
14733 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14734 assemble_name (asm_out_file, buf);
14735 break;
14737 case CONST_INT:
14738 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14739 break;
14741 case CONST:
14742 /* This used to output parentheses around the expression,
14743 but that does not work on the 386 (either ATT or BSD assembler). */
14744 output_pic_addr_const (file, XEXP (x, 0), code);
14745 break;
14747 case CONST_DOUBLE:
14748 /* We can't handle floating point constants;
14749 TARGET_PRINT_OPERAND must handle them. */
14750 output_operand_lossage ("floating constant misused");
14751 break;
14753 case PLUS:
14754 /* Some assemblers need integer constants to appear first. */
14755 if (CONST_INT_P (XEXP (x, 0)))
14757 output_pic_addr_const (file, XEXP (x, 0), code);
14758 putc ('+', file);
14759 output_pic_addr_const (file, XEXP (x, 1), code);
14761 else
14763 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14764 output_pic_addr_const (file, XEXP (x, 1), code);
14765 putc ('+', file);
14766 output_pic_addr_const (file, XEXP (x, 0), code);
14768 break;
14770 case MINUS:
14771 if (!TARGET_MACHO)
14772 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14773 output_pic_addr_const (file, XEXP (x, 0), code);
14774 putc ('-', file);
14775 output_pic_addr_const (file, XEXP (x, 1), code);
14776 if (!TARGET_MACHO)
14777 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14778 break;
14780 case UNSPEC:
14781 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14783 bool f = i386_asm_output_addr_const_extra (file, x);
14784 gcc_assert (f);
14785 break;
14788 gcc_assert (XVECLEN (x, 0) == 1);
14789 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14790 switch (XINT (x, 1))
14792 case UNSPEC_GOT:
14793 fputs ("@GOT", file);
14794 break;
14795 case UNSPEC_GOTOFF:
14796 fputs ("@GOTOFF", file);
14797 break;
14798 case UNSPEC_PLTOFF:
14799 fputs ("@PLTOFF", file);
14800 break;
14801 case UNSPEC_PCREL:
14802 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14803 "(%rip)" : "[rip]", file);
14804 break;
14805 case UNSPEC_GOTPCREL:
14806 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14807 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14808 break;
14809 case UNSPEC_GOTTPOFF:
14810 /* FIXME: This might be @TPOFF in Sun ld too. */
14811 fputs ("@gottpoff", file);
14812 break;
14813 case UNSPEC_TPOFF:
14814 fputs ("@tpoff", file);
14815 break;
14816 case UNSPEC_NTPOFF:
14817 if (TARGET_64BIT)
14818 fputs ("@tpoff", file);
14819 else
14820 fputs ("@ntpoff", file);
14821 break;
14822 case UNSPEC_DTPOFF:
14823 fputs ("@dtpoff", file);
14824 break;
14825 case UNSPEC_GOTNTPOFF:
14826 if (TARGET_64BIT)
14827 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14828 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14829 else
14830 fputs ("@gotntpoff", file);
14831 break;
14832 case UNSPEC_INDNTPOFF:
14833 fputs ("@indntpoff", file);
14834 break;
14835 #if TARGET_MACHO
14836 case UNSPEC_MACHOPIC_OFFSET:
14837 putc ('-', file);
14838 machopic_output_function_base_name (file);
14839 break;
14840 #endif
14841 default:
14842 output_operand_lossage ("invalid UNSPEC as operand");
14843 break;
14845 break;
14847 default:
14848 output_operand_lossage ("invalid expression as operand");
14852 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14853 We need to emit DTP-relative relocations. */
14855 static void ATTRIBUTE_UNUSED
14856 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14858 fputs (ASM_LONG, file);
14859 output_addr_const (file, x);
14860 fputs ("@dtpoff", file);
14861 switch (size)
14863 case 4:
14864 break;
14865 case 8:
14866 fputs (", 0", file);
14867 break;
14868 default:
14869 gcc_unreachable ();
14873 /* Return true if X is a representation of the PIC register. This copes
14874 with calls from ix86_find_base_term, where the register might have
14875 been replaced by a cselib value. */
14877 static bool
14878 ix86_pic_register_p (rtx x)
14880 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14881 return (pic_offset_table_rtx
14882 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14883 else if (!REG_P (x))
14884 return false;
14885 else if (pic_offset_table_rtx)
14887 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14888 return true;
14889 if (HARD_REGISTER_P (x)
14890 && !HARD_REGISTER_P (pic_offset_table_rtx)
14891 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14892 return true;
14893 return false;
14895 else
14896 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14899 /* Helper function for ix86_delegitimize_address.
14900 Attempt to delegitimize TLS local-exec accesses. */
14902 static rtx
14903 ix86_delegitimize_tls_address (rtx orig_x)
14905 rtx x = orig_x, unspec;
14906 struct ix86_address addr;
14908 if (!TARGET_TLS_DIRECT_SEG_REFS)
14909 return orig_x;
14910 if (MEM_P (x))
14911 x = XEXP (x, 0);
14912 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14913 return orig_x;
14914 if (ix86_decompose_address (x, &addr) == 0
14915 || addr.seg != DEFAULT_TLS_SEG_REG
14916 || addr.disp == NULL_RTX
14917 || GET_CODE (addr.disp) != CONST)
14918 return orig_x;
14919 unspec = XEXP (addr.disp, 0);
14920 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14921 unspec = XEXP (unspec, 0);
14922 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14923 return orig_x;
14924 x = XVECEXP (unspec, 0, 0);
14925 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14926 if (unspec != XEXP (addr.disp, 0))
14927 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14928 if (addr.index)
14930 rtx idx = addr.index;
14931 if (addr.scale != 1)
14932 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14933 x = gen_rtx_PLUS (Pmode, idx, x);
14935 if (addr.base)
14936 x = gen_rtx_PLUS (Pmode, addr.base, x);
14937 if (MEM_P (orig_x))
14938 x = replace_equiv_address_nv (orig_x, x);
14939 return x;
14942 /* In the name of slightly smaller debug output, and to cater to
14943 general assembler lossage, recognize PIC+GOTOFF and turn it back
14944 into a direct symbol reference.
14946 On Darwin, this is necessary to avoid a crash, because Darwin
14947 has a different PIC label for each routine but the DWARF debugging
14948 information is not associated with any particular routine, so it's
14949 necessary to remove references to the PIC label from RTL stored by
14950 the DWARF output code. */
14952 static rtx
14953 ix86_delegitimize_address (rtx x)
14955 rtx orig_x = delegitimize_mem_from_attrs (x);
14956 /* addend is NULL or some rtx if x is something+GOTOFF where
14957 something doesn't include the PIC register. */
14958 rtx addend = NULL_RTX;
14959 /* reg_addend is NULL or a multiple of some register. */
14960 rtx reg_addend = NULL_RTX;
14961 /* const_addend is NULL or a const_int. */
14962 rtx const_addend = NULL_RTX;
14963 /* This is the result, or NULL. */
14964 rtx result = NULL_RTX;
14966 x = orig_x;
14968 if (MEM_P (x))
14969 x = XEXP (x, 0);
14971 if (TARGET_64BIT)
14973 if (GET_CODE (x) == CONST
14974 && GET_CODE (XEXP (x, 0)) == PLUS
14975 && GET_MODE (XEXP (x, 0)) == Pmode
14976 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14977 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14978 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14980 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14981 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14982 if (MEM_P (orig_x))
14983 x = replace_equiv_address_nv (orig_x, x);
14984 return x;
14987 if (GET_CODE (x) == CONST
14988 && GET_CODE (XEXP (x, 0)) == UNSPEC
14989 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14990 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14991 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14993 x = XVECEXP (XEXP (x, 0), 0, 0);
14994 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14996 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14997 GET_MODE (x), 0);
14998 if (x == NULL_RTX)
14999 return orig_x;
15001 return x;
15004 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
15005 return ix86_delegitimize_tls_address (orig_x);
15007 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
15008 and -mcmodel=medium -fpic. */
15011 if (GET_CODE (x) != PLUS
15012 || GET_CODE (XEXP (x, 1)) != CONST)
15013 return ix86_delegitimize_tls_address (orig_x);
15015 if (ix86_pic_register_p (XEXP (x, 0)))
15016 /* %ebx + GOT/GOTOFF */
15018 else if (GET_CODE (XEXP (x, 0)) == PLUS)
15020 /* %ebx + %reg * scale + GOT/GOTOFF */
15021 reg_addend = XEXP (x, 0);
15022 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
15023 reg_addend = XEXP (reg_addend, 1);
15024 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
15025 reg_addend = XEXP (reg_addend, 0);
15026 else
15028 reg_addend = NULL_RTX;
15029 addend = XEXP (x, 0);
15032 else
15033 addend = XEXP (x, 0);
15035 x = XEXP (XEXP (x, 1), 0);
15036 if (GET_CODE (x) == PLUS
15037 && CONST_INT_P (XEXP (x, 1)))
15039 const_addend = XEXP (x, 1);
15040 x = XEXP (x, 0);
15043 if (GET_CODE (x) == UNSPEC
15044 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
15045 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
15046 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
15047 && !MEM_P (orig_x) && !addend)))
15048 result = XVECEXP (x, 0, 0);
15050 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15051 && !MEM_P (orig_x))
15052 result = XVECEXP (x, 0, 0);
15054 if (! result)
15055 return ix86_delegitimize_tls_address (orig_x);
15057 if (const_addend)
15058 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15059 if (reg_addend)
15060 result = gen_rtx_PLUS (Pmode, reg_addend, result);
15061 if (addend)
15063 /* If the rest of original X doesn't involve the PIC register, add
15064 addend and subtract pic_offset_table_rtx. This can happen e.g.
15065 for code like:
15066 leal (%ebx, %ecx, 4), %ecx
15068 movl foo@GOTOFF(%ecx), %edx
15069 in which case we return (%ecx - %ebx) + foo
15070 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15071 and reload has completed. */
15072 if (pic_offset_table_rtx
15073 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15074 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15075 pic_offset_table_rtx),
15076 result);
15077 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15079 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15080 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15081 result = gen_rtx_PLUS (Pmode, tmp, result);
15083 else
15084 return orig_x;
15086 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15088 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15089 if (result == NULL_RTX)
15090 return orig_x;
15092 return result;
15095 /* If X is a machine specific address (i.e. a symbol or label being
15096 referenced as a displacement from the GOT implemented using an
15097 UNSPEC), then return the base term. Otherwise return X. */
15100 ix86_find_base_term (rtx x)
15102 rtx term;
15104 if (TARGET_64BIT)
15106 if (GET_CODE (x) != CONST)
15107 return x;
15108 term = XEXP (x, 0);
15109 if (GET_CODE (term) == PLUS
15110 && CONST_INT_P (XEXP (term, 1)))
15111 term = XEXP (term, 0);
15112 if (GET_CODE (term) != UNSPEC
15113 || (XINT (term, 1) != UNSPEC_GOTPCREL
15114 && XINT (term, 1) != UNSPEC_PCREL))
15115 return x;
15117 return XVECEXP (term, 0, 0);
15120 return ix86_delegitimize_address (x);
15123 static void
15124 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15125 bool fp, FILE *file)
15127 const char *suffix;
15129 if (mode == CCFPmode || mode == CCFPUmode)
15131 code = ix86_fp_compare_code_to_integer (code);
15132 mode = CCmode;
15134 if (reverse)
15135 code = reverse_condition (code);
15137 switch (code)
15139 case EQ:
15140 switch (mode)
15142 case CCAmode:
15143 suffix = "a";
15144 break;
15145 case CCCmode:
15146 suffix = "c";
15147 break;
15148 case CCOmode:
15149 suffix = "o";
15150 break;
15151 case CCPmode:
15152 suffix = "p";
15153 break;
15154 case CCSmode:
15155 suffix = "s";
15156 break;
15157 default:
15158 suffix = "e";
15159 break;
15161 break;
15162 case NE:
15163 switch (mode)
15165 case CCAmode:
15166 suffix = "na";
15167 break;
15168 case CCCmode:
15169 suffix = "nc";
15170 break;
15171 case CCOmode:
15172 suffix = "no";
15173 break;
15174 case CCPmode:
15175 suffix = "np";
15176 break;
15177 case CCSmode:
15178 suffix = "ns";
15179 break;
15180 default:
15181 suffix = "ne";
15182 break;
15184 break;
15185 case GT:
15186 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15187 suffix = "g";
15188 break;
15189 case GTU:
15190 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15191 Those same assemblers have the same but opposite lossage on cmov. */
15192 if (mode == CCmode)
15193 suffix = fp ? "nbe" : "a";
15194 else
15195 gcc_unreachable ();
15196 break;
15197 case LT:
15198 switch (mode)
15200 case CCNOmode:
15201 case CCGOCmode:
15202 suffix = "s";
15203 break;
15205 case CCmode:
15206 case CCGCmode:
15207 suffix = "l";
15208 break;
15210 default:
15211 gcc_unreachable ();
15213 break;
15214 case LTU:
15215 if (mode == CCmode)
15216 suffix = "b";
15217 else if (mode == CCCmode)
15218 suffix = fp ? "b" : "c";
15219 else
15220 gcc_unreachable ();
15221 break;
15222 case GE:
15223 switch (mode)
15225 case CCNOmode:
15226 case CCGOCmode:
15227 suffix = "ns";
15228 break;
15230 case CCmode:
15231 case CCGCmode:
15232 suffix = "ge";
15233 break;
15235 default:
15236 gcc_unreachable ();
15238 break;
15239 case GEU:
15240 if (mode == CCmode)
15241 suffix = "nb";
15242 else if (mode == CCCmode)
15243 suffix = fp ? "nb" : "nc";
15244 else
15245 gcc_unreachable ();
15246 break;
15247 case LE:
15248 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15249 suffix = "le";
15250 break;
15251 case LEU:
15252 if (mode == CCmode)
15253 suffix = "be";
15254 else
15255 gcc_unreachable ();
15256 break;
15257 case UNORDERED:
15258 suffix = fp ? "u" : "p";
15259 break;
15260 case ORDERED:
15261 suffix = fp ? "nu" : "np";
15262 break;
15263 default:
15264 gcc_unreachable ();
15266 fputs (suffix, file);
15269 /* Print the name of register X to FILE based on its machine mode and number.
15270 If CODE is 'w', pretend the mode is HImode.
15271 If CODE is 'b', pretend the mode is QImode.
15272 If CODE is 'k', pretend the mode is SImode.
15273 If CODE is 'q', pretend the mode is DImode.
15274 If CODE is 'x', pretend the mode is V4SFmode.
15275 If CODE is 't', pretend the mode is V8SFmode.
15276 If CODE is 'g', pretend the mode is V16SFmode.
15277 If CODE is 'h', pretend the reg is the 'high' byte register.
15278 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15279 If CODE is 'd', duplicate the operand for AVX instruction.
15282 void
15283 print_reg (rtx x, int code, FILE *file)
15285 const char *reg;
15286 int msize;
15287 unsigned int regno;
15288 bool duplicated;
15290 if (ASSEMBLER_DIALECT == ASM_ATT)
15291 putc ('%', file);
15293 if (x == pc_rtx)
15295 gcc_assert (TARGET_64BIT);
15296 fputs ("rip", file);
15297 return;
15300 if (code == 'y' && STACK_TOP_P (x))
15302 fputs ("st(0)", file);
15303 return;
15306 if (code == 'w')
15307 msize = 2;
15308 else if (code == 'b')
15309 msize = 1;
15310 else if (code == 'k')
15311 msize = 4;
15312 else if (code == 'q')
15313 msize = 8;
15314 else if (code == 'h')
15315 msize = 0;
15316 else if (code == 'x')
15317 msize = 16;
15318 else if (code == 't')
15319 msize = 32;
15320 else if (code == 'g')
15321 msize = 64;
15322 else
15323 msize = GET_MODE_SIZE (GET_MODE (x));
15325 regno = true_regnum (x);
15327 gcc_assert (regno != ARG_POINTER_REGNUM
15328 && regno != FRAME_POINTER_REGNUM
15329 && regno != FLAGS_REG
15330 && regno != FPSR_REG
15331 && regno != FPCR_REG);
15333 duplicated = code == 'd' && TARGET_AVX;
15335 switch (msize)
15337 case 8:
15338 case 4:
15339 if (LEGACY_INT_REGNO_P (regno))
15340 putc (msize == 8 && TARGET_64BIT ? 'r' : 'e', file);
15341 case 16:
15342 case 12:
15343 case 2:
15344 normal:
15345 reg = hi_reg_name[regno];
15346 break;
15347 case 1:
15348 if (regno >= ARRAY_SIZE (qi_reg_name))
15349 goto normal;
15350 reg = qi_reg_name[regno];
15351 break;
15352 case 0:
15353 if (regno >= ARRAY_SIZE (qi_high_reg_name))
15354 goto normal;
15355 reg = qi_high_reg_name[regno];
15356 break;
15357 case 32:
15358 case 64:
15359 if (SSE_REGNO_P (regno))
15361 gcc_assert (!duplicated);
15362 putc (msize == 32 ? 'y' : 'z', file);
15363 reg = hi_reg_name[regno] + 1;
15364 break;
15366 goto normal;
15367 default:
15368 gcc_unreachable ();
15371 fputs (reg, file);
15373 /* Irritatingly, AMD extended registers use
15374 different naming convention: "r%d[bwd]" */
15375 if (REX_INT_REGNO_P (regno))
15377 gcc_assert (TARGET_64BIT);
15378 switch (msize)
15380 case 0:
15381 error ("extended registers have no high halves");
15382 break;
15383 case 1:
15384 putc ('b', file);
15385 break;
15386 case 2:
15387 putc ('w', file);
15388 break;
15389 case 4:
15390 putc ('d', file);
15391 break;
15392 case 8:
15393 /* no suffix */
15394 break;
15395 default:
15396 error ("unsupported operand size for extended register");
15397 break;
15399 return;
15402 if (duplicated)
15404 if (ASSEMBLER_DIALECT == ASM_ATT)
15405 fprintf (file, ", %%%s", reg);
15406 else
15407 fprintf (file, ", %s", reg);
15411 /* Meaning of CODE:
15412 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15413 C -- print opcode suffix for set/cmov insn.
15414 c -- like C, but print reversed condition
15415 F,f -- likewise, but for floating-point.
15416 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15417 otherwise nothing
15418 R -- print embeded rounding and sae.
15419 r -- print only sae.
15420 z -- print the opcode suffix for the size of the current operand.
15421 Z -- likewise, with special suffixes for x87 instructions.
15422 * -- print a star (in certain assembler syntax)
15423 A -- print an absolute memory reference.
15424 E -- print address with DImode register names if TARGET_64BIT.
15425 w -- print the operand as if it's a "word" (HImode) even if it isn't.
15426 s -- print a shift double count, followed by the assemblers argument
15427 delimiter.
15428 b -- print the QImode name of the register for the indicated operand.
15429 %b0 would print %al if operands[0] is reg 0.
15430 w -- likewise, print the HImode name of the register.
15431 k -- likewise, print the SImode name of the register.
15432 q -- likewise, print the DImode name of the register.
15433 x -- likewise, print the V4SFmode name of the register.
15434 t -- likewise, print the V8SFmode name of the register.
15435 g -- likewise, print the V16SFmode name of the register.
15436 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15437 y -- print "st(0)" instead of "st" as a register.
15438 d -- print duplicated register operand for AVX instruction.
15439 D -- print condition for SSE cmp instruction.
15440 P -- if PIC, print an @PLT suffix.
15441 p -- print raw symbol name.
15442 X -- don't print any sort of PIC '@' suffix for a symbol.
15443 & -- print some in-use local-dynamic symbol name.
15444 H -- print a memory address offset by 8; used for sse high-parts
15445 Y -- print condition for XOP pcom* instruction.
15446 + -- print a branch hint as 'cs' or 'ds' prefix
15447 ; -- print a semicolon (after prefixes due to bug in older gas).
15448 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15449 @ -- print a segment register of thread base pointer load
15450 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15451 ! -- print MPX prefix for jxx/call/ret instructions if required.
15454 void
15455 ix86_print_operand (FILE *file, rtx x, int code)
15457 if (code)
15459 switch (code)
15461 case 'A':
15462 switch (ASSEMBLER_DIALECT)
15464 case ASM_ATT:
15465 putc ('*', file);
15466 break;
15468 case ASM_INTEL:
15469 /* Intel syntax. For absolute addresses, registers should not
15470 be surrounded by braces. */
15471 if (!REG_P (x))
15473 putc ('[', file);
15474 ix86_print_operand (file, x, 0);
15475 putc (']', file);
15476 return;
15478 break;
15480 default:
15481 gcc_unreachable ();
15484 ix86_print_operand (file, x, 0);
15485 return;
15487 case 'E':
15488 /* Wrap address in an UNSPEC to declare special handling. */
15489 if (TARGET_64BIT)
15490 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15492 output_address (x);
15493 return;
15495 case 'L':
15496 if (ASSEMBLER_DIALECT == ASM_ATT)
15497 putc ('l', file);
15498 return;
15500 case 'W':
15501 if (ASSEMBLER_DIALECT == ASM_ATT)
15502 putc ('w', file);
15503 return;
15505 case 'B':
15506 if (ASSEMBLER_DIALECT == ASM_ATT)
15507 putc ('b', file);
15508 return;
15510 case 'Q':
15511 if (ASSEMBLER_DIALECT == ASM_ATT)
15512 putc ('l', file);
15513 return;
15515 case 'S':
15516 if (ASSEMBLER_DIALECT == ASM_ATT)
15517 putc ('s', file);
15518 return;
15520 case 'T':
15521 if (ASSEMBLER_DIALECT == ASM_ATT)
15522 putc ('t', file);
15523 return;
15525 case 'O':
15526 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15527 if (ASSEMBLER_DIALECT != ASM_ATT)
15528 return;
15530 switch (GET_MODE_SIZE (GET_MODE (x)))
15532 case 2:
15533 putc ('w', file);
15534 break;
15536 case 4:
15537 putc ('l', file);
15538 break;
15540 case 8:
15541 putc ('q', file);
15542 break;
15544 default:
15545 output_operand_lossage
15546 ("invalid operand size for operand code 'O'");
15547 return;
15550 putc ('.', file);
15551 #endif
15552 return;
15554 case 'z':
15555 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15557 /* Opcodes don't get size suffixes if using Intel opcodes. */
15558 if (ASSEMBLER_DIALECT == ASM_INTEL)
15559 return;
15561 switch (GET_MODE_SIZE (GET_MODE (x)))
15563 case 1:
15564 putc ('b', file);
15565 return;
15567 case 2:
15568 putc ('w', file);
15569 return;
15571 case 4:
15572 putc ('l', file);
15573 return;
15575 case 8:
15576 putc ('q', file);
15577 return;
15579 default:
15580 output_operand_lossage
15581 ("invalid operand size for operand code 'z'");
15582 return;
15586 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15587 warning
15588 (0, "non-integer operand used with operand code 'z'");
15589 /* FALLTHRU */
15591 case 'Z':
15592 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15593 if (ASSEMBLER_DIALECT == ASM_INTEL)
15594 return;
15596 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15598 switch (GET_MODE_SIZE (GET_MODE (x)))
15600 case 2:
15601 #ifdef HAVE_AS_IX86_FILDS
15602 putc ('s', file);
15603 #endif
15604 return;
15606 case 4:
15607 putc ('l', file);
15608 return;
15610 case 8:
15611 #ifdef HAVE_AS_IX86_FILDQ
15612 putc ('q', file);
15613 #else
15614 fputs ("ll", file);
15615 #endif
15616 return;
15618 default:
15619 break;
15622 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15624 /* 387 opcodes don't get size suffixes
15625 if the operands are registers. */
15626 if (STACK_REG_P (x))
15627 return;
15629 switch (GET_MODE_SIZE (GET_MODE (x)))
15631 case 4:
15632 putc ('s', file);
15633 return;
15635 case 8:
15636 putc ('l', file);
15637 return;
15639 case 12:
15640 case 16:
15641 putc ('t', file);
15642 return;
15644 default:
15645 break;
15648 else
15650 output_operand_lossage
15651 ("invalid operand type used with operand code 'Z'");
15652 return;
15655 output_operand_lossage
15656 ("invalid operand size for operand code 'Z'");
15657 return;
15659 case 'd':
15660 case 'b':
15661 case 'w':
15662 case 'k':
15663 case 'q':
15664 case 'h':
15665 case 't':
15666 case 'g':
15667 case 'y':
15668 case 'x':
15669 case 'X':
15670 case 'P':
15671 case 'p':
15672 break;
15674 case 's':
15675 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15677 ix86_print_operand (file, x, 0);
15678 fputs (", ", file);
15680 return;
15682 case 'Y':
15683 switch (GET_CODE (x))
15685 case NE:
15686 fputs ("neq", file);
15687 break;
15688 case EQ:
15689 fputs ("eq", file);
15690 break;
15691 case GE:
15692 case GEU:
15693 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15694 break;
15695 case GT:
15696 case GTU:
15697 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15698 break;
15699 case LE:
15700 case LEU:
15701 fputs ("le", file);
15702 break;
15703 case LT:
15704 case LTU:
15705 fputs ("lt", file);
15706 break;
15707 case UNORDERED:
15708 fputs ("unord", file);
15709 break;
15710 case ORDERED:
15711 fputs ("ord", file);
15712 break;
15713 case UNEQ:
15714 fputs ("ueq", file);
15715 break;
15716 case UNGE:
15717 fputs ("nlt", file);
15718 break;
15719 case UNGT:
15720 fputs ("nle", file);
15721 break;
15722 case UNLE:
15723 fputs ("ule", file);
15724 break;
15725 case UNLT:
15726 fputs ("ult", file);
15727 break;
15728 case LTGT:
15729 fputs ("une", file);
15730 break;
15731 default:
15732 output_operand_lossage ("operand is not a condition code, "
15733 "invalid operand code 'Y'");
15734 return;
15736 return;
15738 case 'D':
15739 /* Little bit of braindamage here. The SSE compare instructions
15740 does use completely different names for the comparisons that the
15741 fp conditional moves. */
15742 switch (GET_CODE (x))
15744 case UNEQ:
15745 if (TARGET_AVX)
15747 fputs ("eq_us", file);
15748 break;
15750 case EQ:
15751 fputs ("eq", file);
15752 break;
15753 case UNLT:
15754 if (TARGET_AVX)
15756 fputs ("nge", file);
15757 break;
15759 case LT:
15760 fputs ("lt", file);
15761 break;
15762 case UNLE:
15763 if (TARGET_AVX)
15765 fputs ("ngt", file);
15766 break;
15768 case LE:
15769 fputs ("le", file);
15770 break;
15771 case UNORDERED:
15772 fputs ("unord", file);
15773 break;
15774 case LTGT:
15775 if (TARGET_AVX)
15777 fputs ("neq_oq", file);
15778 break;
15780 case NE:
15781 fputs ("neq", file);
15782 break;
15783 case GE:
15784 if (TARGET_AVX)
15786 fputs ("ge", file);
15787 break;
15789 case UNGE:
15790 fputs ("nlt", file);
15791 break;
15792 case GT:
15793 if (TARGET_AVX)
15795 fputs ("gt", file);
15796 break;
15798 case UNGT:
15799 fputs ("nle", file);
15800 break;
15801 case ORDERED:
15802 fputs ("ord", file);
15803 break;
15804 default:
15805 output_operand_lossage ("operand is not a condition code, "
15806 "invalid operand code 'D'");
15807 return;
15809 return;
15811 case 'F':
15812 case 'f':
15813 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15814 if (ASSEMBLER_DIALECT == ASM_ATT)
15815 putc ('.', file);
15816 #endif
15818 case 'C':
15819 case 'c':
15820 if (!COMPARISON_P (x))
15822 output_operand_lossage ("operand is not a condition code, "
15823 "invalid operand code '%c'", code);
15824 return;
15826 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15827 code == 'c' || code == 'f',
15828 code == 'F' || code == 'f',
15829 file);
15830 return;
15832 case 'H':
15833 if (!offsettable_memref_p (x))
15835 output_operand_lossage ("operand is not an offsettable memory "
15836 "reference, invalid operand code 'H'");
15837 return;
15839 /* It doesn't actually matter what mode we use here, as we're
15840 only going to use this for printing. */
15841 x = adjust_address_nv (x, DImode, 8);
15842 /* Output 'qword ptr' for intel assembler dialect. */
15843 if (ASSEMBLER_DIALECT == ASM_INTEL)
15844 code = 'q';
15845 break;
15847 case 'K':
15848 gcc_assert (CONST_INT_P (x));
15850 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15851 #ifdef HAVE_AS_IX86_HLE
15852 fputs ("xacquire ", file);
15853 #else
15854 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15855 #endif
15856 else if (INTVAL (x) & IX86_HLE_RELEASE)
15857 #ifdef HAVE_AS_IX86_HLE
15858 fputs ("xrelease ", file);
15859 #else
15860 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15861 #endif
15862 /* We do not want to print value of the operand. */
15863 return;
15865 case 'N':
15866 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15867 fputs ("{z}", file);
15868 return;
15870 case 'r':
15871 gcc_assert (CONST_INT_P (x));
15872 gcc_assert (INTVAL (x) == ROUND_SAE);
15874 if (ASSEMBLER_DIALECT == ASM_INTEL)
15875 fputs (", ", file);
15877 fputs ("{sae}", file);
15879 if (ASSEMBLER_DIALECT == ASM_ATT)
15880 fputs (", ", file);
15882 return;
15884 case 'R':
15885 gcc_assert (CONST_INT_P (x));
15887 if (ASSEMBLER_DIALECT == ASM_INTEL)
15888 fputs (", ", file);
15890 switch (INTVAL (x))
15892 case ROUND_NEAREST_INT | ROUND_SAE:
15893 fputs ("{rn-sae}", file);
15894 break;
15895 case ROUND_NEG_INF | ROUND_SAE:
15896 fputs ("{rd-sae}", file);
15897 break;
15898 case ROUND_POS_INF | ROUND_SAE:
15899 fputs ("{ru-sae}", file);
15900 break;
15901 case ROUND_ZERO | ROUND_SAE:
15902 fputs ("{rz-sae}", file);
15903 break;
15904 default:
15905 gcc_unreachable ();
15908 if (ASSEMBLER_DIALECT == ASM_ATT)
15909 fputs (", ", file);
15911 return;
15913 case '*':
15914 if (ASSEMBLER_DIALECT == ASM_ATT)
15915 putc ('*', file);
15916 return;
15918 case '&':
15920 const char *name = get_some_local_dynamic_name ();
15921 if (name == NULL)
15922 output_operand_lossage ("'%%&' used without any "
15923 "local dynamic TLS references");
15924 else
15925 assemble_name (file, name);
15926 return;
15929 case '+':
15931 rtx x;
15933 if (!optimize
15934 || optimize_function_for_size_p (cfun)
15935 || !TARGET_BRANCH_PREDICTION_HINTS)
15936 return;
15938 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15939 if (x)
15941 int pred_val = XINT (x, 0);
15943 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15944 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15946 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15947 bool cputaken
15948 = final_forward_branch_p (current_output_insn) == 0;
15950 /* Emit hints only in the case default branch prediction
15951 heuristics would fail. */
15952 if (taken != cputaken)
15954 /* We use 3e (DS) prefix for taken branches and
15955 2e (CS) prefix for not taken branches. */
15956 if (taken)
15957 fputs ("ds ; ", file);
15958 else
15959 fputs ("cs ; ", file);
15963 return;
15966 case ';':
15967 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15968 putc (';', file);
15969 #endif
15970 return;
15972 case '@':
15973 if (ASSEMBLER_DIALECT == ASM_ATT)
15974 putc ('%', file);
15976 /* The kernel uses a different segment register for performance
15977 reasons; a system call would not have to trash the userspace
15978 segment register, which would be expensive. */
15979 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15980 fputs ("fs", file);
15981 else
15982 fputs ("gs", file);
15983 return;
15985 case '~':
15986 putc (TARGET_AVX2 ? 'i' : 'f', file);
15987 return;
15989 case '^':
15990 if (TARGET_64BIT && Pmode != word_mode)
15991 fputs ("addr32 ", file);
15992 return;
15994 case '!':
15995 if (ix86_bnd_prefixed_insn_p (current_output_insn))
15996 fputs ("bnd ", file);
15997 return;
15999 default:
16000 output_operand_lossage ("invalid operand code '%c'", code);
16004 if (REG_P (x))
16005 print_reg (x, code, file);
16007 else if (MEM_P (x))
16009 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
16010 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
16011 && GET_MODE (x) != BLKmode)
16013 const char * size;
16014 switch (GET_MODE_SIZE (GET_MODE (x)))
16016 case 1: size = "BYTE"; break;
16017 case 2: size = "WORD"; break;
16018 case 4: size = "DWORD"; break;
16019 case 8: size = "QWORD"; break;
16020 case 12: size = "TBYTE"; break;
16021 case 16:
16022 if (GET_MODE (x) == XFmode)
16023 size = "TBYTE";
16024 else
16025 size = "XMMWORD";
16026 break;
16027 case 32: size = "YMMWORD"; break;
16028 case 64: size = "ZMMWORD"; break;
16029 default:
16030 gcc_unreachable ();
16033 /* Check for explicit size override (codes 'b', 'w', 'k',
16034 'q' and 'x') */
16035 if (code == 'b')
16036 size = "BYTE";
16037 else if (code == 'w')
16038 size = "WORD";
16039 else if (code == 'k')
16040 size = "DWORD";
16041 else if (code == 'q')
16042 size = "QWORD";
16043 else if (code == 'x')
16044 size = "XMMWORD";
16046 fputs (size, file);
16047 fputs (" PTR ", file);
16050 x = XEXP (x, 0);
16051 /* Avoid (%rip) for call operands. */
16052 if (CONSTANT_ADDRESS_P (x) && code == 'P'
16053 && !CONST_INT_P (x))
16054 output_addr_const (file, x);
16055 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16056 output_operand_lossage ("invalid constraints for operand");
16057 else
16058 output_address (x);
16061 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
16063 REAL_VALUE_TYPE r;
16064 long l;
16066 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16067 REAL_VALUE_TO_TARGET_SINGLE (r, l);
16069 if (ASSEMBLER_DIALECT == ASM_ATT)
16070 putc ('$', file);
16071 /* Sign extend 32bit SFmode immediate to 8 bytes. */
16072 if (code == 'q')
16073 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16074 (unsigned long long) (int) l);
16075 else
16076 fprintf (file, "0x%08x", (unsigned int) l);
16079 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
16081 REAL_VALUE_TYPE r;
16082 long l[2];
16084 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16085 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16087 if (ASSEMBLER_DIALECT == ASM_ATT)
16088 putc ('$', file);
16089 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16092 /* These float cases don't actually occur as immediate operands. */
16093 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
16095 char dstr[30];
16097 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16098 fputs (dstr, file);
16101 else
16103 /* We have patterns that allow zero sets of memory, for instance.
16104 In 64-bit mode, we should probably support all 8-byte vectors,
16105 since we can in fact encode that into an immediate. */
16106 if (GET_CODE (x) == CONST_VECTOR)
16108 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16109 x = const0_rtx;
16112 if (code != 'P' && code != 'p')
16114 if (CONST_INT_P (x))
16116 if (ASSEMBLER_DIALECT == ASM_ATT)
16117 putc ('$', file);
16119 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16120 || GET_CODE (x) == LABEL_REF)
16122 if (ASSEMBLER_DIALECT == ASM_ATT)
16123 putc ('$', file);
16124 else
16125 fputs ("OFFSET FLAT:", file);
16128 if (CONST_INT_P (x))
16129 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16130 else if (flag_pic || MACHOPIC_INDIRECT)
16131 output_pic_addr_const (file, x, code);
16132 else
16133 output_addr_const (file, x);
16137 static bool
16138 ix86_print_operand_punct_valid_p (unsigned char code)
16140 return (code == '@' || code == '*' || code == '+' || code == '&'
16141 || code == ';' || code == '~' || code == '^' || code == '!');
16144 /* Print a memory operand whose address is ADDR. */
16146 static void
16147 ix86_print_operand_address (FILE *file, rtx addr)
16149 struct ix86_address parts;
16150 rtx base, index, disp;
16151 int scale;
16152 int ok;
16153 bool vsib = false;
16154 int code = 0;
16156 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16158 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16159 gcc_assert (parts.index == NULL_RTX);
16160 parts.index = XVECEXP (addr, 0, 1);
16161 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16162 addr = XVECEXP (addr, 0, 0);
16163 vsib = true;
16165 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16167 gcc_assert (TARGET_64BIT);
16168 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16169 code = 'q';
16171 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16173 ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16174 gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16175 if (parts.base != NULL_RTX)
16177 parts.index = parts.base;
16178 parts.scale = 1;
16180 parts.base = XVECEXP (addr, 0, 0);
16181 addr = XVECEXP (addr, 0, 0);
16183 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16185 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16186 gcc_assert (parts.index == NULL_RTX);
16187 parts.index = XVECEXP (addr, 0, 1);
16188 addr = XVECEXP (addr, 0, 0);
16190 else
16191 ok = ix86_decompose_address (addr, &parts);
16193 gcc_assert (ok);
16195 base = parts.base;
16196 index = parts.index;
16197 disp = parts.disp;
16198 scale = parts.scale;
16200 switch (parts.seg)
16202 case SEG_DEFAULT:
16203 break;
16204 case SEG_FS:
16205 case SEG_GS:
16206 if (ASSEMBLER_DIALECT == ASM_ATT)
16207 putc ('%', file);
16208 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16209 break;
16210 default:
16211 gcc_unreachable ();
16214 /* Use one byte shorter RIP relative addressing for 64bit mode. */
16215 if (TARGET_64BIT && !base && !index)
16217 rtx symbol = disp;
16219 if (GET_CODE (disp) == CONST
16220 && GET_CODE (XEXP (disp, 0)) == PLUS
16221 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16222 symbol = XEXP (XEXP (disp, 0), 0);
16224 if (GET_CODE (symbol) == LABEL_REF
16225 || (GET_CODE (symbol) == SYMBOL_REF
16226 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16227 base = pc_rtx;
16229 if (!base && !index)
16231 /* Displacement only requires special attention. */
16233 if (CONST_INT_P (disp))
16235 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16236 fputs ("ds:", file);
16237 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16239 else if (flag_pic)
16240 output_pic_addr_const (file, disp, 0);
16241 else
16242 output_addr_const (file, disp);
16244 else
16246 /* Print SImode register names to force addr32 prefix. */
16247 if (SImode_address_operand (addr, VOIDmode))
16249 #ifdef ENABLE_CHECKING
16250 gcc_assert (TARGET_64BIT);
16251 switch (GET_CODE (addr))
16253 case SUBREG:
16254 gcc_assert (GET_MODE (addr) == SImode);
16255 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16256 break;
16257 case ZERO_EXTEND:
16258 case AND:
16259 gcc_assert (GET_MODE (addr) == DImode);
16260 break;
16261 default:
16262 gcc_unreachable ();
16264 #endif
16265 gcc_assert (!code);
16266 code = 'k';
16268 else if (code == 0
16269 && TARGET_X32
16270 && disp
16271 && CONST_INT_P (disp)
16272 && INTVAL (disp) < -16*1024*1024)
16274 /* X32 runs in 64-bit mode, where displacement, DISP, in
16275 address DISP(%r64), is encoded as 32-bit immediate sign-
16276 extended from 32-bit to 64-bit. For -0x40000300(%r64),
16277 address is %r64 + 0xffffffffbffffd00. When %r64 <
16278 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16279 which is invalid for x32. The correct address is %r64
16280 - 0x40000300 == 0xf7ffdd64. To properly encode
16281 -0x40000300(%r64) for x32, we zero-extend negative
16282 displacement by forcing addr32 prefix which truncates
16283 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
16284 zero-extend all negative displacements, including -1(%rsp).
16285 However, for small negative displacements, sign-extension
16286 won't cause overflow. We only zero-extend negative
16287 displacements if they < -16*1024*1024, which is also used
16288 to check legitimate address displacements for PIC. */
16289 code = 'k';
16292 if (ASSEMBLER_DIALECT == ASM_ATT)
16294 if (disp)
16296 if (flag_pic)
16297 output_pic_addr_const (file, disp, 0);
16298 else if (GET_CODE (disp) == LABEL_REF)
16299 output_asm_label (disp);
16300 else
16301 output_addr_const (file, disp);
16304 putc ('(', file);
16305 if (base)
16306 print_reg (base, code, file);
16307 if (index)
16309 putc (',', file);
16310 print_reg (index, vsib ? 0 : code, file);
16311 if (scale != 1 || vsib)
16312 fprintf (file, ",%d", scale);
16314 putc (')', file);
16316 else
16318 rtx offset = NULL_RTX;
16320 if (disp)
16322 /* Pull out the offset of a symbol; print any symbol itself. */
16323 if (GET_CODE (disp) == CONST
16324 && GET_CODE (XEXP (disp, 0)) == PLUS
16325 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16327 offset = XEXP (XEXP (disp, 0), 1);
16328 disp = gen_rtx_CONST (VOIDmode,
16329 XEXP (XEXP (disp, 0), 0));
16332 if (flag_pic)
16333 output_pic_addr_const (file, disp, 0);
16334 else if (GET_CODE (disp) == LABEL_REF)
16335 output_asm_label (disp);
16336 else if (CONST_INT_P (disp))
16337 offset = disp;
16338 else
16339 output_addr_const (file, disp);
16342 putc ('[', file);
16343 if (base)
16345 print_reg (base, code, file);
16346 if (offset)
16348 if (INTVAL (offset) >= 0)
16349 putc ('+', file);
16350 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16353 else if (offset)
16354 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16355 else
16356 putc ('0', file);
16358 if (index)
16360 putc ('+', file);
16361 print_reg (index, vsib ? 0 : code, file);
16362 if (scale != 1 || vsib)
16363 fprintf (file, "*%d", scale);
16365 putc (']', file);
16370 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
16372 static bool
16373 i386_asm_output_addr_const_extra (FILE *file, rtx x)
16375 rtx op;
16377 if (GET_CODE (x) != UNSPEC)
16378 return false;
16380 op = XVECEXP (x, 0, 0);
16381 switch (XINT (x, 1))
16383 case UNSPEC_GOTTPOFF:
16384 output_addr_const (file, op);
16385 /* FIXME: This might be @TPOFF in Sun ld. */
16386 fputs ("@gottpoff", file);
16387 break;
16388 case UNSPEC_TPOFF:
16389 output_addr_const (file, op);
16390 fputs ("@tpoff", file);
16391 break;
16392 case UNSPEC_NTPOFF:
16393 output_addr_const (file, op);
16394 if (TARGET_64BIT)
16395 fputs ("@tpoff", file);
16396 else
16397 fputs ("@ntpoff", file);
16398 break;
16399 case UNSPEC_DTPOFF:
16400 output_addr_const (file, op);
16401 fputs ("@dtpoff", file);
16402 break;
16403 case UNSPEC_GOTNTPOFF:
16404 output_addr_const (file, op);
16405 if (TARGET_64BIT)
16406 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16407 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16408 else
16409 fputs ("@gotntpoff", file);
16410 break;
16411 case UNSPEC_INDNTPOFF:
16412 output_addr_const (file, op);
16413 fputs ("@indntpoff", file);
16414 break;
16415 #if TARGET_MACHO
16416 case UNSPEC_MACHOPIC_OFFSET:
16417 output_addr_const (file, op);
16418 putc ('-', file);
16419 machopic_output_function_base_name (file);
16420 break;
16421 #endif
16423 case UNSPEC_STACK_CHECK:
16425 int offset;
16427 gcc_assert (flag_split_stack);
16429 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16430 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16431 #else
16432 gcc_unreachable ();
16433 #endif
16435 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16437 break;
16439 default:
16440 return false;
16443 return true;
16446 /* Split one or more double-mode RTL references into pairs of half-mode
16447 references. The RTL can be REG, offsettable MEM, integer constant, or
16448 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
16449 split and "num" is its length. lo_half and hi_half are output arrays
16450 that parallel "operands". */
16452 void
16453 split_double_mode (machine_mode mode, rtx operands[],
16454 int num, rtx lo_half[], rtx hi_half[])
16456 machine_mode half_mode;
16457 unsigned int byte;
16459 switch (mode)
16461 case TImode:
16462 half_mode = DImode;
16463 break;
16464 case DImode:
16465 half_mode = SImode;
16466 break;
16467 default:
16468 gcc_unreachable ();
16471 byte = GET_MODE_SIZE (half_mode);
16473 while (num--)
16475 rtx op = operands[num];
16477 /* simplify_subreg refuse to split volatile memory addresses,
16478 but we still have to handle it. */
16479 if (MEM_P (op))
16481 lo_half[num] = adjust_address (op, half_mode, 0);
16482 hi_half[num] = adjust_address (op, half_mode, byte);
16484 else
16486 lo_half[num] = simplify_gen_subreg (half_mode, op,
16487 GET_MODE (op) == VOIDmode
16488 ? mode : GET_MODE (op), 0);
16489 hi_half[num] = simplify_gen_subreg (half_mode, op,
16490 GET_MODE (op) == VOIDmode
16491 ? mode : GET_MODE (op), byte);
16496 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
16497 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
16498 is the expression of the binary operation. The output may either be
16499 emitted here, or returned to the caller, like all output_* functions.
16501 There is no guarantee that the operands are the same mode, as they
16502 might be within FLOAT or FLOAT_EXTEND expressions. */
16504 #ifndef SYSV386_COMPAT
16505 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
16506 wants to fix the assemblers because that causes incompatibility
16507 with gcc. No-one wants to fix gcc because that causes
16508 incompatibility with assemblers... You can use the option of
16509 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
16510 #define SYSV386_COMPAT 1
16511 #endif
16513 const char *
16514 output_387_binary_op (rtx insn, rtx *operands)
16516 static char buf[40];
16517 const char *p;
16518 const char *ssep;
16519 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16521 #ifdef ENABLE_CHECKING
16522 /* Even if we do not want to check the inputs, this documents input
16523 constraints. Which helps in understanding the following code. */
16524 if (STACK_REG_P (operands[0])
16525 && ((REG_P (operands[1])
16526 && REGNO (operands[0]) == REGNO (operands[1])
16527 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16528 || (REG_P (operands[2])
16529 && REGNO (operands[0]) == REGNO (operands[2])
16530 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16531 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16532 ; /* ok */
16533 else
16534 gcc_assert (is_sse);
16535 #endif
16537 switch (GET_CODE (operands[3]))
16539 case PLUS:
16540 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16541 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16542 p = "fiadd";
16543 else
16544 p = "fadd";
16545 ssep = "vadd";
16546 break;
16548 case MINUS:
16549 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16550 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16551 p = "fisub";
16552 else
16553 p = "fsub";
16554 ssep = "vsub";
16555 break;
16557 case MULT:
16558 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16559 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16560 p = "fimul";
16561 else
16562 p = "fmul";
16563 ssep = "vmul";
16564 break;
16566 case DIV:
16567 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16568 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16569 p = "fidiv";
16570 else
16571 p = "fdiv";
16572 ssep = "vdiv";
16573 break;
16575 default:
16576 gcc_unreachable ();
16579 if (is_sse)
16581 if (TARGET_AVX)
16583 strcpy (buf, ssep);
16584 if (GET_MODE (operands[0]) == SFmode)
16585 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16586 else
16587 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16589 else
16591 strcpy (buf, ssep + 1);
16592 if (GET_MODE (operands[0]) == SFmode)
16593 strcat (buf, "ss\t{%2, %0|%0, %2}");
16594 else
16595 strcat (buf, "sd\t{%2, %0|%0, %2}");
16597 return buf;
16599 strcpy (buf, p);
16601 switch (GET_CODE (operands[3]))
16603 case MULT:
16604 case PLUS:
16605 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16606 std::swap (operands[1], operands[2]);
16608 /* know operands[0] == operands[1]. */
16610 if (MEM_P (operands[2]))
16612 p = "%Z2\t%2";
16613 break;
16616 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16618 if (STACK_TOP_P (operands[0]))
16619 /* How is it that we are storing to a dead operand[2]?
16620 Well, presumably operands[1] is dead too. We can't
16621 store the result to st(0) as st(0) gets popped on this
16622 instruction. Instead store to operands[2] (which I
16623 think has to be st(1)). st(1) will be popped later.
16624 gcc <= 2.8.1 didn't have this check and generated
16625 assembly code that the Unixware assembler rejected. */
16626 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16627 else
16628 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16629 break;
16632 if (STACK_TOP_P (operands[0]))
16633 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16634 else
16635 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16636 break;
16638 case MINUS:
16639 case DIV:
16640 if (MEM_P (operands[1]))
16642 p = "r%Z1\t%1";
16643 break;
16646 if (MEM_P (operands[2]))
16648 p = "%Z2\t%2";
16649 break;
16652 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16654 #if SYSV386_COMPAT
16655 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16656 derived assemblers, confusingly reverse the direction of
16657 the operation for fsub{r} and fdiv{r} when the
16658 destination register is not st(0). The Intel assembler
16659 doesn't have this brain damage. Read !SYSV386_COMPAT to
16660 figure out what the hardware really does. */
16661 if (STACK_TOP_P (operands[0]))
16662 p = "{p\t%0, %2|rp\t%2, %0}";
16663 else
16664 p = "{rp\t%2, %0|p\t%0, %2}";
16665 #else
16666 if (STACK_TOP_P (operands[0]))
16667 /* As above for fmul/fadd, we can't store to st(0). */
16668 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16669 else
16670 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16671 #endif
16672 break;
16675 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16677 #if SYSV386_COMPAT
16678 if (STACK_TOP_P (operands[0]))
16679 p = "{rp\t%0, %1|p\t%1, %0}";
16680 else
16681 p = "{p\t%1, %0|rp\t%0, %1}";
16682 #else
16683 if (STACK_TOP_P (operands[0]))
16684 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16685 else
16686 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16687 #endif
16688 break;
16691 if (STACK_TOP_P (operands[0]))
16693 if (STACK_TOP_P (operands[1]))
16694 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16695 else
16696 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16697 break;
16699 else if (STACK_TOP_P (operands[1]))
16701 #if SYSV386_COMPAT
16702 p = "{\t%1, %0|r\t%0, %1}";
16703 #else
16704 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16705 #endif
16707 else
16709 #if SYSV386_COMPAT
16710 p = "{r\t%2, %0|\t%0, %2}";
16711 #else
16712 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16713 #endif
16715 break;
16717 default:
16718 gcc_unreachable ();
16721 strcat (buf, p);
16722 return buf;
16725 /* Check if a 256bit AVX register is referenced inside of EXP. */
16727 static bool
16728 ix86_check_avx256_register (const_rtx exp)
16730 if (GET_CODE (exp) == SUBREG)
16731 exp = SUBREG_REG (exp);
16733 return (REG_P (exp)
16734 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16737 /* Return needed mode for entity in optimize_mode_switching pass. */
16739 static int
16740 ix86_avx_u128_mode_needed (rtx_insn *insn)
16742 if (CALL_P (insn))
16744 rtx link;
16746 /* Needed mode is set to AVX_U128_CLEAN if there are
16747 no 256bit modes used in function arguments. */
16748 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16749 link;
16750 link = XEXP (link, 1))
16752 if (GET_CODE (XEXP (link, 0)) == USE)
16754 rtx arg = XEXP (XEXP (link, 0), 0);
16756 if (ix86_check_avx256_register (arg))
16757 return AVX_U128_DIRTY;
16761 return AVX_U128_CLEAN;
16764 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16765 changes state only when a 256bit register is written to, but we need
16766 to prevent the compiler from moving optimal insertion point above
16767 eventual read from 256bit register. */
16768 subrtx_iterator::array_type array;
16769 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16770 if (ix86_check_avx256_register (*iter))
16771 return AVX_U128_DIRTY;
16773 return AVX_U128_ANY;
16776 /* Return mode that i387 must be switched into
16777 prior to the execution of insn. */
16779 static int
16780 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16782 enum attr_i387_cw mode;
16784 /* The mode UNINITIALIZED is used to store control word after a
16785 function call or ASM pattern. The mode ANY specify that function
16786 has no requirements on the control word and make no changes in the
16787 bits we are interested in. */
16789 if (CALL_P (insn)
16790 || (NONJUMP_INSN_P (insn)
16791 && (asm_noperands (PATTERN (insn)) >= 0
16792 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16793 return I387_CW_UNINITIALIZED;
16795 if (recog_memoized (insn) < 0)
16796 return I387_CW_ANY;
16798 mode = get_attr_i387_cw (insn);
16800 switch (entity)
16802 case I387_TRUNC:
16803 if (mode == I387_CW_TRUNC)
16804 return mode;
16805 break;
16807 case I387_FLOOR:
16808 if (mode == I387_CW_FLOOR)
16809 return mode;
16810 break;
16812 case I387_CEIL:
16813 if (mode == I387_CW_CEIL)
16814 return mode;
16815 break;
16817 case I387_MASK_PM:
16818 if (mode == I387_CW_MASK_PM)
16819 return mode;
16820 break;
16822 default:
16823 gcc_unreachable ();
16826 return I387_CW_ANY;
16829 /* Return mode that entity must be switched into
16830 prior to the execution of insn. */
16832 static int
16833 ix86_mode_needed (int entity, rtx_insn *insn)
16835 switch (entity)
16837 case AVX_U128:
16838 return ix86_avx_u128_mode_needed (insn);
16839 case I387_TRUNC:
16840 case I387_FLOOR:
16841 case I387_CEIL:
16842 case I387_MASK_PM:
16843 return ix86_i387_mode_needed (entity, insn);
16844 default:
16845 gcc_unreachable ();
16847 return 0;
16850 /* Check if a 256bit AVX register is referenced in stores. */
16852 static void
16853 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16855 if (ix86_check_avx256_register (dest))
16857 bool *used = (bool *) data;
16858 *used = true;
16862 /* Calculate mode of upper 128bit AVX registers after the insn. */
16864 static int
16865 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16867 rtx pat = PATTERN (insn);
16869 if (vzeroupper_operation (pat, VOIDmode)
16870 || vzeroall_operation (pat, VOIDmode))
16871 return AVX_U128_CLEAN;
16873 /* We know that state is clean after CALL insn if there are no
16874 256bit registers used in the function return register. */
16875 if (CALL_P (insn))
16877 bool avx_reg256_found = false;
16878 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16880 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16883 /* Otherwise, return current mode. Remember that if insn
16884 references AVX 256bit registers, the mode was already changed
16885 to DIRTY from MODE_NEEDED. */
16886 return mode;
16889 /* Return the mode that an insn results in. */
16891 static int
16892 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16894 switch (entity)
16896 case AVX_U128:
16897 return ix86_avx_u128_mode_after (mode, insn);
16898 case I387_TRUNC:
16899 case I387_FLOOR:
16900 case I387_CEIL:
16901 case I387_MASK_PM:
16902 return mode;
16903 default:
16904 gcc_unreachable ();
16908 static int
16909 ix86_avx_u128_mode_entry (void)
16911 tree arg;
16913 /* Entry mode is set to AVX_U128_DIRTY if there are
16914 256bit modes used in function arguments. */
16915 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16916 arg = TREE_CHAIN (arg))
16918 rtx incoming = DECL_INCOMING_RTL (arg);
16920 if (incoming && ix86_check_avx256_register (incoming))
16921 return AVX_U128_DIRTY;
16924 return AVX_U128_CLEAN;
16927 /* Return a mode that ENTITY is assumed to be
16928 switched to at function entry. */
16930 static int
16931 ix86_mode_entry (int entity)
16933 switch (entity)
16935 case AVX_U128:
16936 return ix86_avx_u128_mode_entry ();
16937 case I387_TRUNC:
16938 case I387_FLOOR:
16939 case I387_CEIL:
16940 case I387_MASK_PM:
16941 return I387_CW_ANY;
16942 default:
16943 gcc_unreachable ();
16947 static int
16948 ix86_avx_u128_mode_exit (void)
16950 rtx reg = crtl->return_rtx;
16952 /* Exit mode is set to AVX_U128_DIRTY if there are
16953 256bit modes used in the function return register. */
16954 if (reg && ix86_check_avx256_register (reg))
16955 return AVX_U128_DIRTY;
16957 return AVX_U128_CLEAN;
16960 /* Return a mode that ENTITY is assumed to be
16961 switched to at function exit. */
16963 static int
16964 ix86_mode_exit (int entity)
16966 switch (entity)
16968 case AVX_U128:
16969 return ix86_avx_u128_mode_exit ();
16970 case I387_TRUNC:
16971 case I387_FLOOR:
16972 case I387_CEIL:
16973 case I387_MASK_PM:
16974 return I387_CW_ANY;
16975 default:
16976 gcc_unreachable ();
16980 static int
16981 ix86_mode_priority (int, int n)
16983 return n;
16986 /* Output code to initialize control word copies used by trunc?f?i and
16987 rounding patterns. CURRENT_MODE is set to current control word,
16988 while NEW_MODE is set to new control word. */
16990 static void
16991 emit_i387_cw_initialization (int mode)
16993 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16994 rtx new_mode;
16996 enum ix86_stack_slot slot;
16998 rtx reg = gen_reg_rtx (HImode);
17000 emit_insn (gen_x86_fnstcw_1 (stored_mode));
17001 emit_move_insn (reg, copy_rtx (stored_mode));
17003 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
17004 || optimize_insn_for_size_p ())
17006 switch (mode)
17008 case I387_CW_TRUNC:
17009 /* round toward zero (truncate) */
17010 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
17011 slot = SLOT_CW_TRUNC;
17012 break;
17014 case I387_CW_FLOOR:
17015 /* round down toward -oo */
17016 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17017 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
17018 slot = SLOT_CW_FLOOR;
17019 break;
17021 case I387_CW_CEIL:
17022 /* round up toward +oo */
17023 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17024 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
17025 slot = SLOT_CW_CEIL;
17026 break;
17028 case I387_CW_MASK_PM:
17029 /* mask precision exception for nearbyint() */
17030 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17031 slot = SLOT_CW_MASK_PM;
17032 break;
17034 default:
17035 gcc_unreachable ();
17038 else
17040 switch (mode)
17042 case I387_CW_TRUNC:
17043 /* round toward zero (truncate) */
17044 emit_insn (gen_insvsi_1 (reg, GEN_INT (0xc)));
17045 slot = SLOT_CW_TRUNC;
17046 break;
17048 case I387_CW_FLOOR:
17049 /* round down toward -oo */
17050 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x4)));
17051 slot = SLOT_CW_FLOOR;
17052 break;
17054 case I387_CW_CEIL:
17055 /* round up toward +oo */
17056 emit_insn (gen_insvsi_1 (reg, GEN_INT (0x8)));
17057 slot = SLOT_CW_CEIL;
17058 break;
17060 case I387_CW_MASK_PM:
17061 /* mask precision exception for nearbyint() */
17062 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17063 slot = SLOT_CW_MASK_PM;
17064 break;
17066 default:
17067 gcc_unreachable ();
17071 gcc_assert (slot < MAX_386_STACK_LOCALS);
17073 new_mode = assign_386_stack_local (HImode, slot);
17074 emit_move_insn (new_mode, reg);
17077 /* Emit vzeroupper. */
17079 void
17080 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17082 int i;
17084 /* Cancel automatic vzeroupper insertion if there are
17085 live call-saved SSE registers at the insertion point. */
17087 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17088 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17089 return;
17091 if (TARGET_64BIT)
17092 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17093 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17094 return;
17096 emit_insn (gen_avx_vzeroupper ());
17099 /* Generate one or more insns to set ENTITY to MODE. */
17101 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
17102 is the set of hard registers live at the point where the insn(s)
17103 are to be inserted. */
17105 static void
17106 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17107 HARD_REG_SET regs_live)
17109 switch (entity)
17111 case AVX_U128:
17112 if (mode == AVX_U128_CLEAN)
17113 ix86_avx_emit_vzeroupper (regs_live);
17114 break;
17115 case I387_TRUNC:
17116 case I387_FLOOR:
17117 case I387_CEIL:
17118 case I387_MASK_PM:
17119 if (mode != I387_CW_ANY
17120 && mode != I387_CW_UNINITIALIZED)
17121 emit_i387_cw_initialization (mode);
17122 break;
17123 default:
17124 gcc_unreachable ();
17128 /* Output code for INSN to convert a float to a signed int. OPERANDS
17129 are the insn operands. The output may be [HSD]Imode and the input
17130 operand may be [SDX]Fmode. */
17132 const char *
17133 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17135 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17136 int dimode_p = GET_MODE (operands[0]) == DImode;
17137 int round_mode = get_attr_i387_cw (insn);
17139 /* Jump through a hoop or two for DImode, since the hardware has no
17140 non-popping instruction. We used to do this a different way, but
17141 that was somewhat fragile and broke with post-reload splitters. */
17142 if ((dimode_p || fisttp) && !stack_top_dies)
17143 output_asm_insn ("fld\t%y1", operands);
17145 gcc_assert (STACK_TOP_P (operands[1]));
17146 gcc_assert (MEM_P (operands[0]));
17147 gcc_assert (GET_MODE (operands[1]) != TFmode);
17149 if (fisttp)
17150 output_asm_insn ("fisttp%Z0\t%0", operands);
17151 else
17153 if (round_mode != I387_CW_ANY)
17154 output_asm_insn ("fldcw\t%3", operands);
17155 if (stack_top_dies || dimode_p)
17156 output_asm_insn ("fistp%Z0\t%0", operands);
17157 else
17158 output_asm_insn ("fist%Z0\t%0", operands);
17159 if (round_mode != I387_CW_ANY)
17160 output_asm_insn ("fldcw\t%2", operands);
17163 return "";
17166 /* Output code for x87 ffreep insn. The OPNO argument, which may only
17167 have the values zero or one, indicates the ffreep insn's operand
17168 from the OPERANDS array. */
17170 static const char *
17171 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17173 if (TARGET_USE_FFREEP)
17174 #ifdef HAVE_AS_IX86_FFREEP
17175 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17176 #else
17178 static char retval[32];
17179 int regno = REGNO (operands[opno]);
17181 gcc_assert (STACK_REGNO_P (regno));
17183 regno -= FIRST_STACK_REG;
17185 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17186 return retval;
17188 #endif
17190 return opno ? "fstp\t%y1" : "fstp\t%y0";
17194 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
17195 should be used. UNORDERED_P is true when fucom should be used. */
17197 const char *
17198 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17200 int stack_top_dies;
17201 rtx cmp_op0, cmp_op1;
17202 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17204 if (eflags_p)
17206 cmp_op0 = operands[0];
17207 cmp_op1 = operands[1];
17209 else
17211 cmp_op0 = operands[1];
17212 cmp_op1 = operands[2];
17215 if (is_sse)
17217 if (GET_MODE (operands[0]) == SFmode)
17218 if (unordered_p)
17219 return "%vucomiss\t{%1, %0|%0, %1}";
17220 else
17221 return "%vcomiss\t{%1, %0|%0, %1}";
17222 else
17223 if (unordered_p)
17224 return "%vucomisd\t{%1, %0|%0, %1}";
17225 else
17226 return "%vcomisd\t{%1, %0|%0, %1}";
17229 gcc_assert (STACK_TOP_P (cmp_op0));
17231 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17233 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17235 if (stack_top_dies)
17237 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17238 return output_387_ffreep (operands, 1);
17240 else
17241 return "ftst\n\tfnstsw\t%0";
17244 if (STACK_REG_P (cmp_op1)
17245 && stack_top_dies
17246 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17247 && REGNO (cmp_op1) != FIRST_STACK_REG)
17249 /* If both the top of the 387 stack dies, and the other operand
17250 is also a stack register that dies, then this must be a
17251 `fcompp' float compare */
17253 if (eflags_p)
17255 /* There is no double popping fcomi variant. Fortunately,
17256 eflags is immune from the fstp's cc clobbering. */
17257 if (unordered_p)
17258 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17259 else
17260 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17261 return output_387_ffreep (operands, 0);
17263 else
17265 if (unordered_p)
17266 return "fucompp\n\tfnstsw\t%0";
17267 else
17268 return "fcompp\n\tfnstsw\t%0";
17271 else
17273 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
17275 static const char * const alt[16] =
17277 "fcom%Z2\t%y2\n\tfnstsw\t%0",
17278 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
17279 "fucom%Z2\t%y2\n\tfnstsw\t%0",
17280 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
17282 "ficom%Z2\t%y2\n\tfnstsw\t%0",
17283 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
17284 NULL,
17285 NULL,
17287 "fcomi\t{%y1, %0|%0, %y1}",
17288 "fcomip\t{%y1, %0|%0, %y1}",
17289 "fucomi\t{%y1, %0|%0, %y1}",
17290 "fucomip\t{%y1, %0|%0, %y1}",
17292 NULL,
17293 NULL,
17294 NULL,
17295 NULL
17298 int mask;
17299 const char *ret;
17301 mask = eflags_p << 3;
17302 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17303 mask |= unordered_p << 1;
17304 mask |= stack_top_dies;
17306 gcc_assert (mask < 16);
17307 ret = alt[mask];
17308 gcc_assert (ret);
17310 return ret;
17314 void
17315 ix86_output_addr_vec_elt (FILE *file, int value)
17317 const char *directive = ASM_LONG;
17319 #ifdef ASM_QUAD
17320 if (TARGET_LP64)
17321 directive = ASM_QUAD;
17322 #else
17323 gcc_assert (!TARGET_64BIT);
17324 #endif
17326 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17329 void
17330 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17332 const char *directive = ASM_LONG;
17334 #ifdef ASM_QUAD
17335 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17336 directive = ASM_QUAD;
17337 #else
17338 gcc_assert (!TARGET_64BIT);
17339 #endif
17340 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
17341 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17342 fprintf (file, "%s%s%d-%s%d\n",
17343 directive, LPREFIX, value, LPREFIX, rel);
17344 else if (HAVE_AS_GOTOFF_IN_DATA)
17345 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17346 #if TARGET_MACHO
17347 else if (TARGET_MACHO)
17349 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17350 machopic_output_function_base_name (file);
17351 putc ('\n', file);
17353 #endif
17354 else
17355 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17356 GOT_SYMBOL_NAME, LPREFIX, value);
17359 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17360 for the target. */
17362 void
17363 ix86_expand_clear (rtx dest)
17365 rtx tmp;
17367 /* We play register width games, which are only valid after reload. */
17368 gcc_assert (reload_completed);
17370 /* Avoid HImode and its attendant prefix byte. */
17371 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17372 dest = gen_rtx_REG (SImode, REGNO (dest));
17373 tmp = gen_rtx_SET (dest, const0_rtx);
17375 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17377 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17378 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17381 emit_insn (tmp);
17384 /* X is an unchanging MEM. If it is a constant pool reference, return
17385 the constant pool rtx, else NULL. */
17388 maybe_get_pool_constant (rtx x)
17390 x = ix86_delegitimize_address (XEXP (x, 0));
17392 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17393 return get_pool_constant (x);
17395 return NULL_RTX;
17398 void
17399 ix86_expand_move (machine_mode mode, rtx operands[])
17401 rtx op0, op1;
17402 enum tls_model model;
17404 op0 = operands[0];
17405 op1 = operands[1];
17407 if (GET_CODE (op1) == SYMBOL_REF)
17409 rtx tmp;
17411 model = SYMBOL_REF_TLS_MODEL (op1);
17412 if (model)
17414 op1 = legitimize_tls_address (op1, model, true);
17415 op1 = force_operand (op1, op0);
17416 if (op1 == op0)
17417 return;
17418 op1 = convert_to_mode (mode, op1, 1);
17420 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17421 op1 = tmp;
17423 else if (GET_CODE (op1) == CONST
17424 && GET_CODE (XEXP (op1, 0)) == PLUS
17425 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17427 rtx addend = XEXP (XEXP (op1, 0), 1);
17428 rtx symbol = XEXP (XEXP (op1, 0), 0);
17429 rtx tmp;
17431 model = SYMBOL_REF_TLS_MODEL (symbol);
17432 if (model)
17433 tmp = legitimize_tls_address (symbol, model, true);
17434 else
17435 tmp = legitimize_pe_coff_symbol (symbol, true);
17437 if (tmp)
17439 tmp = force_operand (tmp, NULL);
17440 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17441 op0, 1, OPTAB_DIRECT);
17442 if (tmp == op0)
17443 return;
17444 op1 = convert_to_mode (mode, tmp, 1);
17448 if ((flag_pic || MACHOPIC_INDIRECT)
17449 && symbolic_operand (op1, mode))
17451 if (TARGET_MACHO && !TARGET_64BIT)
17453 #if TARGET_MACHO
17454 /* dynamic-no-pic */
17455 if (MACHOPIC_INDIRECT)
17457 rtx temp = (op0 && REG_P (op0) && mode == Pmode)
17458 ? op0 : gen_reg_rtx (Pmode);
17459 op1 = machopic_indirect_data_reference (op1, temp);
17460 if (MACHOPIC_PURE)
17461 op1 = machopic_legitimize_pic_address (op1, mode,
17462 temp == op1 ? 0 : temp);
17464 if (op0 != op1 && GET_CODE (op0) != MEM)
17466 rtx insn = gen_rtx_SET (op0, op1);
17467 emit_insn (insn);
17468 return;
17470 if (GET_CODE (op0) == MEM)
17471 op1 = force_reg (Pmode, op1);
17472 else
17474 rtx temp = op0;
17475 if (GET_CODE (temp) != REG)
17476 temp = gen_reg_rtx (Pmode);
17477 temp = legitimize_pic_address (op1, temp);
17478 if (temp == op0)
17479 return;
17480 op1 = temp;
17482 /* dynamic-no-pic */
17483 #endif
17485 else
17487 if (MEM_P (op0))
17488 op1 = force_reg (mode, op1);
17489 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17491 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17492 op1 = legitimize_pic_address (op1, reg);
17493 if (op0 == op1)
17494 return;
17495 op1 = convert_to_mode (mode, op1, 1);
17499 else
17501 if (MEM_P (op0)
17502 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17503 || !push_operand (op0, mode))
17504 && MEM_P (op1))
17505 op1 = force_reg (mode, op1);
17507 if (push_operand (op0, mode)
17508 && ! general_no_elim_operand (op1, mode))
17509 op1 = copy_to_mode_reg (mode, op1);
17511 /* Force large constants in 64bit compilation into register
17512 to get them CSEed. */
17513 if (can_create_pseudo_p ()
17514 && (mode == DImode) && TARGET_64BIT
17515 && immediate_operand (op1, mode)
17516 && !x86_64_zext_immediate_operand (op1, VOIDmode)
17517 && !register_operand (op0, mode)
17518 && optimize)
17519 op1 = copy_to_mode_reg (mode, op1);
17521 if (can_create_pseudo_p ()
17522 && CONST_DOUBLE_P (op1))
17524 /* If we are loading a floating point constant to a register,
17525 force the value to memory now, since we'll get better code
17526 out the back end. */
17528 op1 = validize_mem (force_const_mem (mode, op1));
17529 if (!register_operand (op0, mode))
17531 rtx temp = gen_reg_rtx (mode);
17532 emit_insn (gen_rtx_SET (temp, op1));
17533 emit_move_insn (op0, temp);
17534 return;
17539 emit_insn (gen_rtx_SET (op0, op1));
17542 void
17543 ix86_expand_vector_move (machine_mode mode, rtx operands[])
17545 rtx op0 = operands[0], op1 = operands[1];
17546 unsigned int align = GET_MODE_ALIGNMENT (mode);
17548 if (push_operand (op0, VOIDmode))
17549 op0 = emit_move_resolve_push (mode, op0);
17551 /* Force constants other than zero into memory. We do not know how
17552 the instructions used to build constants modify the upper 64 bits
17553 of the register, once we have that information we may be able
17554 to handle some of them more efficiently. */
17555 if (can_create_pseudo_p ()
17556 && register_operand (op0, mode)
17557 && (CONSTANT_P (op1)
17558 || (GET_CODE (op1) == SUBREG
17559 && CONSTANT_P (SUBREG_REG (op1))))
17560 && !standard_sse_constant_p (op1))
17561 op1 = validize_mem (force_const_mem (mode, op1));
17563 /* We need to check memory alignment for SSE mode since attribute
17564 can make operands unaligned. */
17565 if (can_create_pseudo_p ()
17566 && SSE_REG_MODE_P (mode)
17567 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17568 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17570 rtx tmp[2];
17572 /* ix86_expand_vector_move_misalign() does not like constants ... */
17573 if (CONSTANT_P (op1)
17574 || (GET_CODE (op1) == SUBREG
17575 && CONSTANT_P (SUBREG_REG (op1))))
17576 op1 = validize_mem (force_const_mem (mode, op1));
17578 /* ... nor both arguments in memory. */
17579 if (!register_operand (op0, mode)
17580 && !register_operand (op1, mode))
17581 op1 = force_reg (mode, op1);
17583 tmp[0] = op0; tmp[1] = op1;
17584 ix86_expand_vector_move_misalign (mode, tmp);
17585 return;
17588 /* Make operand1 a register if it isn't already. */
17589 if (can_create_pseudo_p ()
17590 && !register_operand (op0, mode)
17591 && !register_operand (op1, mode))
17593 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17594 return;
17597 emit_insn (gen_rtx_SET (op0, op1));
17600 /* Split 32-byte AVX unaligned load and store if needed. */
17602 static void
17603 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17605 rtx m;
17606 rtx (*extract) (rtx, rtx, rtx);
17607 rtx (*load_unaligned) (rtx, rtx);
17608 rtx (*store_unaligned) (rtx, rtx);
17609 machine_mode mode;
17611 switch (GET_MODE (op0))
17613 default:
17614 gcc_unreachable ();
17615 case V32QImode:
17616 extract = gen_avx_vextractf128v32qi;
17617 load_unaligned = gen_avx_loaddquv32qi;
17618 store_unaligned = gen_avx_storedquv32qi;
17619 mode = V16QImode;
17620 break;
17621 case V8SFmode:
17622 extract = gen_avx_vextractf128v8sf;
17623 load_unaligned = gen_avx_loadups256;
17624 store_unaligned = gen_avx_storeups256;
17625 mode = V4SFmode;
17626 break;
17627 case V4DFmode:
17628 extract = gen_avx_vextractf128v4df;
17629 load_unaligned = gen_avx_loadupd256;
17630 store_unaligned = gen_avx_storeupd256;
17631 mode = V2DFmode;
17632 break;
17635 if (MEM_P (op1))
17637 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17638 && optimize_insn_for_speed_p ())
17640 rtx r = gen_reg_rtx (mode);
17641 m = adjust_address (op1, mode, 0);
17642 emit_move_insn (r, m);
17643 m = adjust_address (op1, mode, 16);
17644 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17645 emit_move_insn (op0, r);
17647 /* Normal *mov<mode>_internal pattern will handle
17648 unaligned loads just fine if misaligned_operand
17649 is true, and without the UNSPEC it can be combined
17650 with arithmetic instructions. */
17651 else if (misaligned_operand (op1, GET_MODE (op1)))
17652 emit_insn (gen_rtx_SET (op0, op1));
17653 else
17654 emit_insn (load_unaligned (op0, op1));
17656 else if (MEM_P (op0))
17658 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17659 && optimize_insn_for_speed_p ())
17661 m = adjust_address (op0, mode, 0);
17662 emit_insn (extract (m, op1, const0_rtx));
17663 m = adjust_address (op0, mode, 16);
17664 emit_insn (extract (m, op1, const1_rtx));
17666 else
17667 emit_insn (store_unaligned (op0, op1));
17669 else
17670 gcc_unreachable ();
17673 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17674 straight to ix86_expand_vector_move. */
17675 /* Code generation for scalar reg-reg moves of single and double precision data:
17676 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17677 movaps reg, reg
17678 else
17679 movss reg, reg
17680 if (x86_sse_partial_reg_dependency == true)
17681 movapd reg, reg
17682 else
17683 movsd reg, reg
17685 Code generation for scalar loads of double precision data:
17686 if (x86_sse_split_regs == true)
17687 movlpd mem, reg (gas syntax)
17688 else
17689 movsd mem, reg
17691 Code generation for unaligned packed loads of single precision data
17692 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17693 if (x86_sse_unaligned_move_optimal)
17694 movups mem, reg
17696 if (x86_sse_partial_reg_dependency == true)
17698 xorps reg, reg
17699 movlps mem, reg
17700 movhps mem+8, reg
17702 else
17704 movlps mem, reg
17705 movhps mem+8, reg
17708 Code generation for unaligned packed loads of double precision data
17709 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17710 if (x86_sse_unaligned_move_optimal)
17711 movupd mem, reg
17713 if (x86_sse_split_regs == true)
17715 movlpd mem, reg
17716 movhpd mem+8, reg
17718 else
17720 movsd mem, reg
17721 movhpd mem+8, reg
17725 void
17726 ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17728 rtx op0, op1, orig_op0 = NULL_RTX, m;
17729 rtx (*load_unaligned) (rtx, rtx);
17730 rtx (*store_unaligned) (rtx, rtx);
17732 op0 = operands[0];
17733 op1 = operands[1];
17735 if (GET_MODE_SIZE (mode) == 64)
17737 switch (GET_MODE_CLASS (mode))
17739 case MODE_VECTOR_INT:
17740 case MODE_INT:
17741 if (GET_MODE (op0) != V16SImode)
17743 if (!MEM_P (op0))
17745 orig_op0 = op0;
17746 op0 = gen_reg_rtx (V16SImode);
17748 else
17749 op0 = gen_lowpart (V16SImode, op0);
17751 op1 = gen_lowpart (V16SImode, op1);
17752 /* FALLTHRU */
17754 case MODE_VECTOR_FLOAT:
17755 switch (GET_MODE (op0))
17757 default:
17758 gcc_unreachable ();
17759 case V16SImode:
17760 load_unaligned = gen_avx512f_loaddquv16si;
17761 store_unaligned = gen_avx512f_storedquv16si;
17762 break;
17763 case V16SFmode:
17764 load_unaligned = gen_avx512f_loadups512;
17765 store_unaligned = gen_avx512f_storeups512;
17766 break;
17767 case V8DFmode:
17768 load_unaligned = gen_avx512f_loadupd512;
17769 store_unaligned = gen_avx512f_storeupd512;
17770 break;
17773 if (MEM_P (op1))
17774 emit_insn (load_unaligned (op0, op1));
17775 else if (MEM_P (op0))
17776 emit_insn (store_unaligned (op0, op1));
17777 else
17778 gcc_unreachable ();
17779 if (orig_op0)
17780 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17781 break;
17783 default:
17784 gcc_unreachable ();
17787 return;
17790 if (TARGET_AVX
17791 && GET_MODE_SIZE (mode) == 32)
17793 switch (GET_MODE_CLASS (mode))
17795 case MODE_VECTOR_INT:
17796 case MODE_INT:
17797 if (GET_MODE (op0) != V32QImode)
17799 if (!MEM_P (op0))
17801 orig_op0 = op0;
17802 op0 = gen_reg_rtx (V32QImode);
17804 else
17805 op0 = gen_lowpart (V32QImode, op0);
17807 op1 = gen_lowpart (V32QImode, op1);
17808 /* FALLTHRU */
17810 case MODE_VECTOR_FLOAT:
17811 ix86_avx256_split_vector_move_misalign (op0, op1);
17812 if (orig_op0)
17813 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17814 break;
17816 default:
17817 gcc_unreachable ();
17820 return;
17823 if (MEM_P (op1))
17825 /* Normal *mov<mode>_internal pattern will handle
17826 unaligned loads just fine if misaligned_operand
17827 is true, and without the UNSPEC it can be combined
17828 with arithmetic instructions. */
17829 if (TARGET_AVX
17830 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17831 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17832 && misaligned_operand (op1, GET_MODE (op1)))
17833 emit_insn (gen_rtx_SET (op0, op1));
17834 /* ??? If we have typed data, then it would appear that using
17835 movdqu is the only way to get unaligned data loaded with
17836 integer type. */
17837 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17839 if (GET_MODE (op0) != V16QImode)
17841 orig_op0 = op0;
17842 op0 = gen_reg_rtx (V16QImode);
17844 op1 = gen_lowpart (V16QImode, op1);
17845 /* We will eventually emit movups based on insn attributes. */
17846 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17847 if (orig_op0)
17848 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17850 else if (TARGET_SSE2 && mode == V2DFmode)
17852 rtx zero;
17854 if (TARGET_AVX
17855 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17856 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17857 || optimize_insn_for_size_p ())
17859 /* We will eventually emit movups based on insn attributes. */
17860 emit_insn (gen_sse2_loadupd (op0, op1));
17861 return;
17864 /* When SSE registers are split into halves, we can avoid
17865 writing to the top half twice. */
17866 if (TARGET_SSE_SPLIT_REGS)
17868 emit_clobber (op0);
17869 zero = op0;
17871 else
17873 /* ??? Not sure about the best option for the Intel chips.
17874 The following would seem to satisfy; the register is
17875 entirely cleared, breaking the dependency chain. We
17876 then store to the upper half, with a dependency depth
17877 of one. A rumor has it that Intel recommends two movsd
17878 followed by an unpacklpd, but this is unconfirmed. And
17879 given that the dependency depth of the unpacklpd would
17880 still be one, I'm not sure why this would be better. */
17881 zero = CONST0_RTX (V2DFmode);
17884 m = adjust_address (op1, DFmode, 0);
17885 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17886 m = adjust_address (op1, DFmode, 8);
17887 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17889 else
17891 rtx t;
17893 if (TARGET_AVX
17894 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17895 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17896 || optimize_insn_for_size_p ())
17898 if (GET_MODE (op0) != V4SFmode)
17900 orig_op0 = op0;
17901 op0 = gen_reg_rtx (V4SFmode);
17903 op1 = gen_lowpart (V4SFmode, op1);
17904 emit_insn (gen_sse_loadups (op0, op1));
17905 if (orig_op0)
17906 emit_move_insn (orig_op0,
17907 gen_lowpart (GET_MODE (orig_op0), op0));
17908 return;
17911 if (mode != V4SFmode)
17912 t = gen_reg_rtx (V4SFmode);
17913 else
17914 t = op0;
17916 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17917 emit_move_insn (t, CONST0_RTX (V4SFmode));
17918 else
17919 emit_clobber (t);
17921 m = adjust_address (op1, V2SFmode, 0);
17922 emit_insn (gen_sse_loadlps (t, t, m));
17923 m = adjust_address (op1, V2SFmode, 8);
17924 emit_insn (gen_sse_loadhps (t, t, m));
17925 if (mode != V4SFmode)
17926 emit_move_insn (op0, gen_lowpart (mode, t));
17929 else if (MEM_P (op0))
17931 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17933 op0 = gen_lowpart (V16QImode, op0);
17934 op1 = gen_lowpart (V16QImode, op1);
17935 /* We will eventually emit movups based on insn attributes. */
17936 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17938 else if (TARGET_SSE2 && mode == V2DFmode)
17940 if (TARGET_AVX
17941 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17942 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17943 || optimize_insn_for_size_p ())
17944 /* We will eventually emit movups based on insn attributes. */
17945 emit_insn (gen_sse2_storeupd (op0, op1));
17946 else
17948 m = adjust_address (op0, DFmode, 0);
17949 emit_insn (gen_sse2_storelpd (m, op1));
17950 m = adjust_address (op0, DFmode, 8);
17951 emit_insn (gen_sse2_storehpd (m, op1));
17954 else
17956 if (mode != V4SFmode)
17957 op1 = gen_lowpart (V4SFmode, op1);
17959 if (TARGET_AVX
17960 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17961 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17962 || optimize_insn_for_size_p ())
17964 op0 = gen_lowpart (V4SFmode, op0);
17965 emit_insn (gen_sse_storeups (op0, op1));
17967 else
17969 m = adjust_address (op0, V2SFmode, 0);
17970 emit_insn (gen_sse_storelps (m, op1));
17971 m = adjust_address (op0, V2SFmode, 8);
17972 emit_insn (gen_sse_storehps (m, op1));
17976 else
17977 gcc_unreachable ();
17980 /* Helper function of ix86_fixup_binary_operands to canonicalize
17981 operand order. Returns true if the operands should be swapped. */
17983 static bool
17984 ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
17985 rtx operands[])
17987 rtx dst = operands[0];
17988 rtx src1 = operands[1];
17989 rtx src2 = operands[2];
17991 /* If the operation is not commutative, we can't do anything. */
17992 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17993 return false;
17995 /* Highest priority is that src1 should match dst. */
17996 if (rtx_equal_p (dst, src1))
17997 return false;
17998 if (rtx_equal_p (dst, src2))
17999 return true;
18001 /* Next highest priority is that immediate constants come second. */
18002 if (immediate_operand (src2, mode))
18003 return false;
18004 if (immediate_operand (src1, mode))
18005 return true;
18007 /* Lowest priority is that memory references should come second. */
18008 if (MEM_P (src2))
18009 return false;
18010 if (MEM_P (src1))
18011 return true;
18013 return false;
18017 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
18018 destination to use for the operation. If different from the true
18019 destination in operands[0], a copy operation will be required. */
18022 ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
18023 rtx operands[])
18025 rtx dst = operands[0];
18026 rtx src1 = operands[1];
18027 rtx src2 = operands[2];
18029 /* Canonicalize operand order. */
18030 if (ix86_swap_binary_operands_p (code, mode, operands))
18032 /* It is invalid to swap operands of different modes. */
18033 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
18035 std::swap (src1, src2);
18038 /* Both source operands cannot be in memory. */
18039 if (MEM_P (src1) && MEM_P (src2))
18041 /* Optimization: Only read from memory once. */
18042 if (rtx_equal_p (src1, src2))
18044 src2 = force_reg (mode, src2);
18045 src1 = src2;
18047 else if (rtx_equal_p (dst, src1))
18048 src2 = force_reg (mode, src2);
18049 else
18050 src1 = force_reg (mode, src1);
18053 /* If the destination is memory, and we do not have matching source
18054 operands, do things in registers. */
18055 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18056 dst = gen_reg_rtx (mode);
18058 /* Source 1 cannot be a constant. */
18059 if (CONSTANT_P (src1))
18060 src1 = force_reg (mode, src1);
18062 /* Source 1 cannot be a non-matching memory. */
18063 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18064 src1 = force_reg (mode, src1);
18066 /* Improve address combine. */
18067 if (code == PLUS
18068 && GET_MODE_CLASS (mode) == MODE_INT
18069 && MEM_P (src2))
18070 src2 = force_reg (mode, src2);
18072 operands[1] = src1;
18073 operands[2] = src2;
18074 return dst;
18077 /* Similarly, but assume that the destination has already been
18078 set up properly. */
18080 void
18081 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18082 machine_mode mode, rtx operands[])
18084 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18085 gcc_assert (dst == operands[0]);
18088 /* Attempt to expand a binary operator. Make the expansion closer to the
18089 actual machine, then just general_operand, which will allow 3 separate
18090 memory references (one output, two input) in a single insn. */
18092 void
18093 ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18094 rtx operands[])
18096 rtx src1, src2, dst, op, clob;
18098 dst = ix86_fixup_binary_operands (code, mode, operands);
18099 src1 = operands[1];
18100 src2 = operands[2];
18102 /* Emit the instruction. */
18104 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18106 if (reload_completed
18107 && code == PLUS
18108 && !rtx_equal_p (dst, src1))
18110 /* This is going to be an LEA; avoid splitting it later. */
18111 emit_insn (op);
18113 else
18115 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18116 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18119 /* Fix up the destination if needed. */
18120 if (dst != operands[0])
18121 emit_move_insn (operands[0], dst);
18124 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18125 the given OPERANDS. */
18127 void
18128 ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18129 rtx operands[])
18131 rtx op1 = NULL_RTX, op2 = NULL_RTX;
18132 if (GET_CODE (operands[1]) == SUBREG)
18134 op1 = operands[1];
18135 op2 = operands[2];
18137 else if (GET_CODE (operands[2]) == SUBREG)
18139 op1 = operands[2];
18140 op2 = operands[1];
18142 /* Optimize (__m128i) d | (__m128i) e and similar code
18143 when d and e are float vectors into float vector logical
18144 insn. In C/C++ without using intrinsics there is no other way
18145 to express vector logical operation on float vectors than
18146 to cast them temporarily to integer vectors. */
18147 if (op1
18148 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18149 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18150 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18151 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18152 && SUBREG_BYTE (op1) == 0
18153 && (GET_CODE (op2) == CONST_VECTOR
18154 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18155 && SUBREG_BYTE (op2) == 0))
18156 && can_create_pseudo_p ())
18158 rtx dst;
18159 switch (GET_MODE (SUBREG_REG (op1)))
18161 case V4SFmode:
18162 case V8SFmode:
18163 case V16SFmode:
18164 case V2DFmode:
18165 case V4DFmode:
18166 case V8DFmode:
18167 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18168 if (GET_CODE (op2) == CONST_VECTOR)
18170 op2 = gen_lowpart (GET_MODE (dst), op2);
18171 op2 = force_reg (GET_MODE (dst), op2);
18173 else
18175 op1 = operands[1];
18176 op2 = SUBREG_REG (operands[2]);
18177 if (!nonimmediate_operand (op2, GET_MODE (dst)))
18178 op2 = force_reg (GET_MODE (dst), op2);
18180 op1 = SUBREG_REG (op1);
18181 if (!nonimmediate_operand (op1, GET_MODE (dst)))
18182 op1 = force_reg (GET_MODE (dst), op1);
18183 emit_insn (gen_rtx_SET (dst,
18184 gen_rtx_fmt_ee (code, GET_MODE (dst),
18185 op1, op2)));
18186 emit_move_insn (operands[0], gen_lowpart (mode, dst));
18187 return;
18188 default:
18189 break;
18192 if (!nonimmediate_operand (operands[1], mode))
18193 operands[1] = force_reg (mode, operands[1]);
18194 if (!nonimmediate_operand (operands[2], mode))
18195 operands[2] = force_reg (mode, operands[2]);
18196 ix86_fixup_binary_operands_no_copy (code, mode, operands);
18197 emit_insn (gen_rtx_SET (operands[0],
18198 gen_rtx_fmt_ee (code, mode, operands[1],
18199 operands[2])));
18202 /* Return TRUE or FALSE depending on whether the binary operator meets the
18203 appropriate constraints. */
18205 bool
18206 ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18207 rtx operands[3])
18209 rtx dst = operands[0];
18210 rtx src1 = operands[1];
18211 rtx src2 = operands[2];
18213 /* Both source operands cannot be in memory. */
18214 if (MEM_P (src1) && MEM_P (src2))
18215 return false;
18217 /* Canonicalize operand order for commutative operators. */
18218 if (ix86_swap_binary_operands_p (code, mode, operands))
18219 std::swap (src1, src2);
18221 /* If the destination is memory, we must have a matching source operand. */
18222 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18223 return false;
18225 /* Source 1 cannot be a constant. */
18226 if (CONSTANT_P (src1))
18227 return false;
18229 /* Source 1 cannot be a non-matching memory. */
18230 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18231 /* Support "andhi/andsi/anddi" as a zero-extending move. */
18232 return (code == AND
18233 && (mode == HImode
18234 || mode == SImode
18235 || (TARGET_64BIT && mode == DImode))
18236 && satisfies_constraint_L (src2));
18238 return true;
18241 /* Attempt to expand a unary operator. Make the expansion closer to the
18242 actual machine, then just general_operand, which will allow 2 separate
18243 memory references (one output, one input) in a single insn. */
18245 void
18246 ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18247 rtx operands[])
18249 bool matching_memory = false;
18250 rtx src, dst, op, clob;
18252 dst = operands[0];
18253 src = operands[1];
18255 /* If the destination is memory, and we do not have matching source
18256 operands, do things in registers. */
18257 if (MEM_P (dst))
18259 if (rtx_equal_p (dst, src))
18260 matching_memory = true;
18261 else
18262 dst = gen_reg_rtx (mode);
18265 /* When source operand is memory, destination must match. */
18266 if (MEM_P (src) && !matching_memory)
18267 src = force_reg (mode, src);
18269 /* Emit the instruction. */
18271 op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src));
18273 if (code == NOT)
18274 emit_insn (op);
18275 else
18277 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18278 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18281 /* Fix up the destination if needed. */
18282 if (dst != operands[0])
18283 emit_move_insn (operands[0], dst);
18286 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18287 divisor are within the range [0-255]. */
18289 void
18290 ix86_split_idivmod (machine_mode mode, rtx operands[],
18291 bool signed_p)
18293 rtx_code_label *end_label, *qimode_label;
18294 rtx insn, div, mod;
18295 rtx scratch, tmp0, tmp1, tmp2;
18296 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18297 rtx (*gen_zero_extend) (rtx, rtx);
18298 rtx (*gen_test_ccno_1) (rtx, rtx);
18300 switch (mode)
18302 case SImode:
18303 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18304 gen_test_ccno_1 = gen_testsi_ccno_1;
18305 gen_zero_extend = gen_zero_extendqisi2;
18306 break;
18307 case DImode:
18308 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18309 gen_test_ccno_1 = gen_testdi_ccno_1;
18310 gen_zero_extend = gen_zero_extendqidi2;
18311 break;
18312 default:
18313 gcc_unreachable ();
18316 end_label = gen_label_rtx ();
18317 qimode_label = gen_label_rtx ();
18319 scratch = gen_reg_rtx (mode);
18321 /* Use 8bit unsigned divimod if dividend and divisor are within
18322 the range [0-255]. */
18323 emit_move_insn (scratch, operands[2]);
18324 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18325 scratch, 1, OPTAB_DIRECT);
18326 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18327 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18328 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18329 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18330 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18331 pc_rtx);
18332 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0));
18333 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18334 JUMP_LABEL (insn) = qimode_label;
18336 /* Generate original signed/unsigned divimod. */
18337 div = gen_divmod4_1 (operands[0], operands[1],
18338 operands[2], operands[3]);
18339 emit_insn (div);
18341 /* Branch to the end. */
18342 emit_jump_insn (gen_jump (end_label));
18343 emit_barrier ();
18345 /* Generate 8bit unsigned divide. */
18346 emit_label (qimode_label);
18347 /* Don't use operands[0] for result of 8bit divide since not all
18348 registers support QImode ZERO_EXTRACT. */
18349 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18350 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18351 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18352 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18354 if (signed_p)
18356 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18357 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18359 else
18361 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18362 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18365 /* Extract remainder from AH. */
18366 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18367 if (REG_P (operands[1]))
18368 insn = emit_move_insn (operands[1], tmp1);
18369 else
18371 /* Need a new scratch register since the old one has result
18372 of 8bit divide. */
18373 scratch = gen_reg_rtx (mode);
18374 emit_move_insn (scratch, tmp1);
18375 insn = emit_move_insn (operands[1], scratch);
18377 set_unique_reg_note (insn, REG_EQUAL, mod);
18379 /* Zero extend quotient from AL. */
18380 tmp1 = gen_lowpart (QImode, tmp0);
18381 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18382 set_unique_reg_note (insn, REG_EQUAL, div);
18384 emit_label (end_label);
18387 #define LEA_MAX_STALL (3)
18388 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18390 /* Increase given DISTANCE in half-cycles according to
18391 dependencies between PREV and NEXT instructions.
18392 Add 1 half-cycle if there is no dependency and
18393 go to next cycle if there is some dependecy. */
18395 static unsigned int
18396 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18398 df_ref def, use;
18400 if (!prev || !next)
18401 return distance + (distance & 1) + 2;
18403 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18404 return distance + 1;
18406 FOR_EACH_INSN_USE (use, next)
18407 FOR_EACH_INSN_DEF (def, prev)
18408 if (!DF_REF_IS_ARTIFICIAL (def)
18409 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18410 return distance + (distance & 1) + 2;
18412 return distance + 1;
18415 /* Function checks if instruction INSN defines register number
18416 REGNO1 or REGNO2. */
18418 static bool
18419 insn_defines_reg (unsigned int regno1, unsigned int regno2,
18420 rtx_insn *insn)
18422 df_ref def;
18424 FOR_EACH_INSN_DEF (def, insn)
18425 if (DF_REF_REG_DEF_P (def)
18426 && !DF_REF_IS_ARTIFICIAL (def)
18427 && (regno1 == DF_REF_REGNO (def)
18428 || regno2 == DF_REF_REGNO (def)))
18429 return true;
18431 return false;
18434 /* Function checks if instruction INSN uses register number
18435 REGNO as a part of address expression. */
18437 static bool
18438 insn_uses_reg_mem (unsigned int regno, rtx insn)
18440 df_ref use;
18442 FOR_EACH_INSN_USE (use, insn)
18443 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18444 return true;
18446 return false;
18449 /* Search backward for non-agu definition of register number REGNO1
18450 or register number REGNO2 in basic block starting from instruction
18451 START up to head of basic block or instruction INSN.
18453 Function puts true value into *FOUND var if definition was found
18454 and false otherwise.
18456 Distance in half-cycles between START and found instruction or head
18457 of BB is added to DISTANCE and returned. */
18459 static int
18460 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18461 rtx_insn *insn, int distance,
18462 rtx_insn *start, bool *found)
18464 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18465 rtx_insn *prev = start;
18466 rtx_insn *next = NULL;
18468 *found = false;
18470 while (prev
18471 && prev != insn
18472 && distance < LEA_SEARCH_THRESHOLD)
18474 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18476 distance = increase_distance (prev, next, distance);
18477 if (insn_defines_reg (regno1, regno2, prev))
18479 if (recog_memoized (prev) < 0
18480 || get_attr_type (prev) != TYPE_LEA)
18482 *found = true;
18483 return distance;
18487 next = prev;
18489 if (prev == BB_HEAD (bb))
18490 break;
18492 prev = PREV_INSN (prev);
18495 return distance;
18498 /* Search backward for non-agu definition of register number REGNO1
18499 or register number REGNO2 in INSN's basic block until
18500 1. Pass LEA_SEARCH_THRESHOLD instructions, or
18501 2. Reach neighbour BBs boundary, or
18502 3. Reach agu definition.
18503 Returns the distance between the non-agu definition point and INSN.
18504 If no definition point, returns -1. */
18506 static int
18507 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18508 rtx_insn *insn)
18510 basic_block bb = BLOCK_FOR_INSN (insn);
18511 int distance = 0;
18512 bool found = false;
18514 if (insn != BB_HEAD (bb))
18515 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18516 distance, PREV_INSN (insn),
18517 &found);
18519 if (!found && distance < LEA_SEARCH_THRESHOLD)
18521 edge e;
18522 edge_iterator ei;
18523 bool simple_loop = false;
18525 FOR_EACH_EDGE (e, ei, bb->preds)
18526 if (e->src == bb)
18528 simple_loop = true;
18529 break;
18532 if (simple_loop)
18533 distance = distance_non_agu_define_in_bb (regno1, regno2,
18534 insn, distance,
18535 BB_END (bb), &found);
18536 else
18538 int shortest_dist = -1;
18539 bool found_in_bb = false;
18541 FOR_EACH_EDGE (e, ei, bb->preds)
18543 int bb_dist
18544 = distance_non_agu_define_in_bb (regno1, regno2,
18545 insn, distance,
18546 BB_END (e->src),
18547 &found_in_bb);
18548 if (found_in_bb)
18550 if (shortest_dist < 0)
18551 shortest_dist = bb_dist;
18552 else if (bb_dist > 0)
18553 shortest_dist = MIN (bb_dist, shortest_dist);
18555 found = true;
18559 distance = shortest_dist;
18563 /* get_attr_type may modify recog data. We want to make sure
18564 that recog data is valid for instruction INSN, on which
18565 distance_non_agu_define is called. INSN is unchanged here. */
18566 extract_insn_cached (insn);
18568 if (!found)
18569 return -1;
18571 return distance >> 1;
18574 /* Return the distance in half-cycles between INSN and the next
18575 insn that uses register number REGNO in memory address added
18576 to DISTANCE. Return -1 if REGNO0 is set.
18578 Put true value into *FOUND if register usage was found and
18579 false otherwise.
18580 Put true value into *REDEFINED if register redefinition was
18581 found and false otherwise. */
18583 static int
18584 distance_agu_use_in_bb (unsigned int regno,
18585 rtx_insn *insn, int distance, rtx_insn *start,
18586 bool *found, bool *redefined)
18588 basic_block bb = NULL;
18589 rtx_insn *next = start;
18590 rtx_insn *prev = NULL;
18592 *found = false;
18593 *redefined = false;
18595 if (start != NULL_RTX)
18597 bb = BLOCK_FOR_INSN (start);
18598 if (start != BB_HEAD (bb))
18599 /* If insn and start belong to the same bb, set prev to insn,
18600 so the call to increase_distance will increase the distance
18601 between insns by 1. */
18602 prev = insn;
18605 while (next
18606 && next != insn
18607 && distance < LEA_SEARCH_THRESHOLD)
18609 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18611 distance = increase_distance(prev, next, distance);
18612 if (insn_uses_reg_mem (regno, next))
18614 /* Return DISTANCE if OP0 is used in memory
18615 address in NEXT. */
18616 *found = true;
18617 return distance;
18620 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18622 /* Return -1 if OP0 is set in NEXT. */
18623 *redefined = true;
18624 return -1;
18627 prev = next;
18630 if (next == BB_END (bb))
18631 break;
18633 next = NEXT_INSN (next);
18636 return distance;
18639 /* Return the distance between INSN and the next insn that uses
18640 register number REGNO0 in memory address. Return -1 if no such
18641 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18643 static int
18644 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18646 basic_block bb = BLOCK_FOR_INSN (insn);
18647 int distance = 0;
18648 bool found = false;
18649 bool redefined = false;
18651 if (insn != BB_END (bb))
18652 distance = distance_agu_use_in_bb (regno0, insn, distance,
18653 NEXT_INSN (insn),
18654 &found, &redefined);
18656 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18658 edge e;
18659 edge_iterator ei;
18660 bool simple_loop = false;
18662 FOR_EACH_EDGE (e, ei, bb->succs)
18663 if (e->dest == bb)
18665 simple_loop = true;
18666 break;
18669 if (simple_loop)
18670 distance = distance_agu_use_in_bb (regno0, insn,
18671 distance, BB_HEAD (bb),
18672 &found, &redefined);
18673 else
18675 int shortest_dist = -1;
18676 bool found_in_bb = false;
18677 bool redefined_in_bb = false;
18679 FOR_EACH_EDGE (e, ei, bb->succs)
18681 int bb_dist
18682 = distance_agu_use_in_bb (regno0, insn,
18683 distance, BB_HEAD (e->dest),
18684 &found_in_bb, &redefined_in_bb);
18685 if (found_in_bb)
18687 if (shortest_dist < 0)
18688 shortest_dist = bb_dist;
18689 else if (bb_dist > 0)
18690 shortest_dist = MIN (bb_dist, shortest_dist);
18692 found = true;
18696 distance = shortest_dist;
18700 if (!found || redefined)
18701 return -1;
18703 return distance >> 1;
18706 /* Define this macro to tune LEA priority vs ADD, it take effect when
18707 there is a dilemma of choicing LEA or ADD
18708 Negative value: ADD is more preferred than LEA
18709 Zero: Netrual
18710 Positive value: LEA is more preferred than ADD*/
18711 #define IX86_LEA_PRIORITY 0
18713 /* Return true if usage of lea INSN has performance advantage
18714 over a sequence of instructions. Instructions sequence has
18715 SPLIT_COST cycles higher latency than lea latency. */
18717 static bool
18718 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18719 unsigned int regno2, int split_cost, bool has_scale)
18721 int dist_define, dist_use;
18723 /* For Silvermont if using a 2-source or 3-source LEA for
18724 non-destructive destination purposes, or due to wanting
18725 ability to use SCALE, the use of LEA is justified. */
18726 if (TARGET_SILVERMONT || TARGET_INTEL)
18728 if (has_scale)
18729 return true;
18730 if (split_cost < 1)
18731 return false;
18732 if (regno0 == regno1 || regno0 == regno2)
18733 return false;
18734 return true;
18737 dist_define = distance_non_agu_define (regno1, regno2, insn);
18738 dist_use = distance_agu_use (regno0, insn);
18740 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18742 /* If there is no non AGU operand definition, no AGU
18743 operand usage and split cost is 0 then both lea
18744 and non lea variants have same priority. Currently
18745 we prefer lea for 64 bit code and non lea on 32 bit
18746 code. */
18747 if (dist_use < 0 && split_cost == 0)
18748 return TARGET_64BIT || IX86_LEA_PRIORITY;
18749 else
18750 return true;
18753 /* With longer definitions distance lea is more preferable.
18754 Here we change it to take into account splitting cost and
18755 lea priority. */
18756 dist_define += split_cost + IX86_LEA_PRIORITY;
18758 /* If there is no use in memory addess then we just check
18759 that split cost exceeds AGU stall. */
18760 if (dist_use < 0)
18761 return dist_define > LEA_MAX_STALL;
18763 /* If this insn has both backward non-agu dependence and forward
18764 agu dependence, the one with short distance takes effect. */
18765 return dist_define >= dist_use;
18768 /* Return true if it is legal to clobber flags by INSN and
18769 false otherwise. */
18771 static bool
18772 ix86_ok_to_clobber_flags (rtx_insn *insn)
18774 basic_block bb = BLOCK_FOR_INSN (insn);
18775 df_ref use;
18776 bitmap live;
18778 while (insn)
18780 if (NONDEBUG_INSN_P (insn))
18782 FOR_EACH_INSN_USE (use, insn)
18783 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18784 return false;
18786 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18787 return true;
18790 if (insn == BB_END (bb))
18791 break;
18793 insn = NEXT_INSN (insn);
18796 live = df_get_live_out(bb);
18797 return !REGNO_REG_SET_P (live, FLAGS_REG);
18800 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18801 move and add to avoid AGU stalls. */
18803 bool
18804 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18806 unsigned int regno0, regno1, regno2;
18808 /* Check if we need to optimize. */
18809 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18810 return false;
18812 /* Check it is correct to split here. */
18813 if (!ix86_ok_to_clobber_flags(insn))
18814 return false;
18816 regno0 = true_regnum (operands[0]);
18817 regno1 = true_regnum (operands[1]);
18818 regno2 = true_regnum (operands[2]);
18820 /* We need to split only adds with non destructive
18821 destination operand. */
18822 if (regno0 == regno1 || regno0 == regno2)
18823 return false;
18824 else
18825 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18828 /* Return true if we should emit lea instruction instead of mov
18829 instruction. */
18831 bool
18832 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18834 unsigned int regno0, regno1;
18836 /* Check if we need to optimize. */
18837 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18838 return false;
18840 /* Use lea for reg to reg moves only. */
18841 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18842 return false;
18844 regno0 = true_regnum (operands[0]);
18845 regno1 = true_regnum (operands[1]);
18847 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18850 /* Return true if we need to split lea into a sequence of
18851 instructions to avoid AGU stalls. */
18853 bool
18854 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18856 unsigned int regno0, regno1, regno2;
18857 int split_cost;
18858 struct ix86_address parts;
18859 int ok;
18861 /* Check we need to optimize. */
18862 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18863 return false;
18865 /* The "at least two components" test below might not catch simple
18866 move or zero extension insns if parts.base is non-NULL and parts.disp
18867 is const0_rtx as the only components in the address, e.g. if the
18868 register is %rbp or %r13. As this test is much cheaper and moves or
18869 zero extensions are the common case, do this check first. */
18870 if (REG_P (operands[1])
18871 || (SImode_address_operand (operands[1], VOIDmode)
18872 && REG_P (XEXP (operands[1], 0))))
18873 return false;
18875 /* Check if it is OK to split here. */
18876 if (!ix86_ok_to_clobber_flags (insn))
18877 return false;
18879 ok = ix86_decompose_address (operands[1], &parts);
18880 gcc_assert (ok);
18882 /* There should be at least two components in the address. */
18883 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18884 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18885 return false;
18887 /* We should not split into add if non legitimate pic
18888 operand is used as displacement. */
18889 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18890 return false;
18892 regno0 = true_regnum (operands[0]) ;
18893 regno1 = INVALID_REGNUM;
18894 regno2 = INVALID_REGNUM;
18896 if (parts.base)
18897 regno1 = true_regnum (parts.base);
18898 if (parts.index)
18899 regno2 = true_regnum (parts.index);
18901 split_cost = 0;
18903 /* Compute how many cycles we will add to execution time
18904 if split lea into a sequence of instructions. */
18905 if (parts.base || parts.index)
18907 /* Have to use mov instruction if non desctructive
18908 destination form is used. */
18909 if (regno1 != regno0 && regno2 != regno0)
18910 split_cost += 1;
18912 /* Have to add index to base if both exist. */
18913 if (parts.base && parts.index)
18914 split_cost += 1;
18916 /* Have to use shift and adds if scale is 2 or greater. */
18917 if (parts.scale > 1)
18919 if (regno0 != regno1)
18920 split_cost += 1;
18921 else if (regno2 == regno0)
18922 split_cost += 4;
18923 else
18924 split_cost += parts.scale;
18927 /* Have to use add instruction with immediate if
18928 disp is non zero. */
18929 if (parts.disp && parts.disp != const0_rtx)
18930 split_cost += 1;
18932 /* Subtract the price of lea. */
18933 split_cost -= 1;
18936 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18937 parts.scale > 1);
18940 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18941 matches destination. RTX includes clobber of FLAGS_REG. */
18943 static void
18944 ix86_emit_binop (enum rtx_code code, machine_mode mode,
18945 rtx dst, rtx src)
18947 rtx op, clob;
18949 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
18950 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18952 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18955 /* Return true if regno1 def is nearest to the insn. */
18957 static bool
18958 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18960 rtx_insn *prev = insn;
18961 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18963 if (insn == start)
18964 return false;
18965 while (prev && prev != start)
18967 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18969 prev = PREV_INSN (prev);
18970 continue;
18972 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18973 return true;
18974 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18975 return false;
18976 prev = PREV_INSN (prev);
18979 /* None of the regs is defined in the bb. */
18980 return false;
18983 /* Split lea instructions into a sequence of instructions
18984 which are executed on ALU to avoid AGU stalls.
18985 It is assumed that it is allowed to clobber flags register
18986 at lea position. */
18988 void
18989 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
18991 unsigned int regno0, regno1, regno2;
18992 struct ix86_address parts;
18993 rtx target, tmp;
18994 int ok, adds;
18996 ok = ix86_decompose_address (operands[1], &parts);
18997 gcc_assert (ok);
18999 target = gen_lowpart (mode, operands[0]);
19001 regno0 = true_regnum (target);
19002 regno1 = INVALID_REGNUM;
19003 regno2 = INVALID_REGNUM;
19005 if (parts.base)
19007 parts.base = gen_lowpart (mode, parts.base);
19008 regno1 = true_regnum (parts.base);
19011 if (parts.index)
19013 parts.index = gen_lowpart (mode, parts.index);
19014 regno2 = true_regnum (parts.index);
19017 if (parts.disp)
19018 parts.disp = gen_lowpart (mode, parts.disp);
19020 if (parts.scale > 1)
19022 /* Case r1 = r1 + ... */
19023 if (regno1 == regno0)
19025 /* If we have a case r1 = r1 + C * r2 then we
19026 should use multiplication which is very
19027 expensive. Assume cost model is wrong if we
19028 have such case here. */
19029 gcc_assert (regno2 != regno0);
19031 for (adds = parts.scale; adds > 0; adds--)
19032 ix86_emit_binop (PLUS, mode, target, parts.index);
19034 else
19036 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
19037 if (regno0 != regno2)
19038 emit_insn (gen_rtx_SET (target, parts.index));
19040 /* Use shift for scaling. */
19041 ix86_emit_binop (ASHIFT, mode, target,
19042 GEN_INT (exact_log2 (parts.scale)));
19044 if (parts.base)
19045 ix86_emit_binop (PLUS, mode, target, parts.base);
19047 if (parts.disp && parts.disp != const0_rtx)
19048 ix86_emit_binop (PLUS, mode, target, parts.disp);
19051 else if (!parts.base && !parts.index)
19053 gcc_assert(parts.disp);
19054 emit_insn (gen_rtx_SET (target, parts.disp));
19056 else
19058 if (!parts.base)
19060 if (regno0 != regno2)
19061 emit_insn (gen_rtx_SET (target, parts.index));
19063 else if (!parts.index)
19065 if (regno0 != regno1)
19066 emit_insn (gen_rtx_SET (target, parts.base));
19068 else
19070 if (regno0 == regno1)
19071 tmp = parts.index;
19072 else if (regno0 == regno2)
19073 tmp = parts.base;
19074 else
19076 rtx tmp1;
19078 /* Find better operand for SET instruction, depending
19079 on which definition is farther from the insn. */
19080 if (find_nearest_reg_def (insn, regno1, regno2))
19081 tmp = parts.index, tmp1 = parts.base;
19082 else
19083 tmp = parts.base, tmp1 = parts.index;
19085 emit_insn (gen_rtx_SET (target, tmp));
19087 if (parts.disp && parts.disp != const0_rtx)
19088 ix86_emit_binop (PLUS, mode, target, parts.disp);
19090 ix86_emit_binop (PLUS, mode, target, tmp1);
19091 return;
19094 ix86_emit_binop (PLUS, mode, target, tmp);
19097 if (parts.disp && parts.disp != const0_rtx)
19098 ix86_emit_binop (PLUS, mode, target, parts.disp);
19102 /* Return true if it is ok to optimize an ADD operation to LEA
19103 operation to avoid flag register consumation. For most processors,
19104 ADD is faster than LEA. For the processors like BONNELL, if the
19105 destination register of LEA holds an actual address which will be
19106 used soon, LEA is better and otherwise ADD is better. */
19108 bool
19109 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19111 unsigned int regno0 = true_regnum (operands[0]);
19112 unsigned int regno1 = true_regnum (operands[1]);
19113 unsigned int regno2 = true_regnum (operands[2]);
19115 /* If a = b + c, (a!=b && a!=c), must use lea form. */
19116 if (regno0 != regno1 && regno0 != regno2)
19117 return true;
19119 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19120 return false;
19122 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19125 /* Return true if destination reg of SET_BODY is shift count of
19126 USE_BODY. */
19128 static bool
19129 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19131 rtx set_dest;
19132 rtx shift_rtx;
19133 int i;
19135 /* Retrieve destination of SET_BODY. */
19136 switch (GET_CODE (set_body))
19138 case SET:
19139 set_dest = SET_DEST (set_body);
19140 if (!set_dest || !REG_P (set_dest))
19141 return false;
19142 break;
19143 case PARALLEL:
19144 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19145 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19146 use_body))
19147 return true;
19148 default:
19149 return false;
19150 break;
19153 /* Retrieve shift count of USE_BODY. */
19154 switch (GET_CODE (use_body))
19156 case SET:
19157 shift_rtx = XEXP (use_body, 1);
19158 break;
19159 case PARALLEL:
19160 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19161 if (ix86_dep_by_shift_count_body (set_body,
19162 XVECEXP (use_body, 0, i)))
19163 return true;
19164 default:
19165 return false;
19166 break;
19169 if (shift_rtx
19170 && (GET_CODE (shift_rtx) == ASHIFT
19171 || GET_CODE (shift_rtx) == LSHIFTRT
19172 || GET_CODE (shift_rtx) == ASHIFTRT
19173 || GET_CODE (shift_rtx) == ROTATE
19174 || GET_CODE (shift_rtx) == ROTATERT))
19176 rtx shift_count = XEXP (shift_rtx, 1);
19178 /* Return true if shift count is dest of SET_BODY. */
19179 if (REG_P (shift_count))
19181 /* Add check since it can be invoked before register
19182 allocation in pre-reload schedule. */
19183 if (reload_completed
19184 && true_regnum (set_dest) == true_regnum (shift_count))
19185 return true;
19186 else if (REGNO(set_dest) == REGNO(shift_count))
19187 return true;
19191 return false;
19194 /* Return true if destination reg of SET_INSN is shift count of
19195 USE_INSN. */
19197 bool
19198 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19200 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19201 PATTERN (use_insn));
19204 /* Return TRUE or FALSE depending on whether the unary operator meets the
19205 appropriate constraints. */
19207 bool
19208 ix86_unary_operator_ok (enum rtx_code,
19209 machine_mode,
19210 rtx operands[2])
19212 /* If one of operands is memory, source and destination must match. */
19213 if ((MEM_P (operands[0])
19214 || MEM_P (operands[1]))
19215 && ! rtx_equal_p (operands[0], operands[1]))
19216 return false;
19217 return true;
19220 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19221 are ok, keeping in mind the possible movddup alternative. */
19223 bool
19224 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19226 if (MEM_P (operands[0]))
19227 return rtx_equal_p (operands[0], operands[1 + high]);
19228 if (MEM_P (operands[1]) && MEM_P (operands[2]))
19229 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19230 return true;
19233 /* Post-reload splitter for converting an SF or DFmode value in an
19234 SSE register into an unsigned SImode. */
19236 void
19237 ix86_split_convert_uns_si_sse (rtx operands[])
19239 machine_mode vecmode;
19240 rtx value, large, zero_or_two31, input, two31, x;
19242 large = operands[1];
19243 zero_or_two31 = operands[2];
19244 input = operands[3];
19245 two31 = operands[4];
19246 vecmode = GET_MODE (large);
19247 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19249 /* Load up the value into the low element. We must ensure that the other
19250 elements are valid floats -- zero is the easiest such value. */
19251 if (MEM_P (input))
19253 if (vecmode == V4SFmode)
19254 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19255 else
19256 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19258 else
19260 input = gen_rtx_REG (vecmode, REGNO (input));
19261 emit_move_insn (value, CONST0_RTX (vecmode));
19262 if (vecmode == V4SFmode)
19263 emit_insn (gen_sse_movss (value, value, input));
19264 else
19265 emit_insn (gen_sse2_movsd (value, value, input));
19268 emit_move_insn (large, two31);
19269 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19271 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19272 emit_insn (gen_rtx_SET (large, x));
19274 x = gen_rtx_AND (vecmode, zero_or_two31, large);
19275 emit_insn (gen_rtx_SET (zero_or_two31, x));
19277 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19278 emit_insn (gen_rtx_SET (value, x));
19280 large = gen_rtx_REG (V4SImode, REGNO (large));
19281 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19283 x = gen_rtx_REG (V4SImode, REGNO (value));
19284 if (vecmode == V4SFmode)
19285 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19286 else
19287 emit_insn (gen_sse2_cvttpd2dq (x, value));
19288 value = x;
19290 emit_insn (gen_xorv4si3 (value, value, large));
19293 /* Convert an unsigned DImode value into a DFmode, using only SSE.
19294 Expects the 64-bit DImode to be supplied in a pair of integral
19295 registers. Requires SSE2; will use SSE3 if available. For x86_32,
19296 -mfpmath=sse, !optimize_size only. */
19298 void
19299 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19301 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19302 rtx int_xmm, fp_xmm;
19303 rtx biases, exponents;
19304 rtx x;
19306 int_xmm = gen_reg_rtx (V4SImode);
19307 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19308 emit_insn (gen_movdi_to_sse (int_xmm, input));
19309 else if (TARGET_SSE_SPLIT_REGS)
19311 emit_clobber (int_xmm);
19312 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19314 else
19316 x = gen_reg_rtx (V2DImode);
19317 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19318 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19321 x = gen_rtx_CONST_VECTOR (V4SImode,
19322 gen_rtvec (4, GEN_INT (0x43300000UL),
19323 GEN_INT (0x45300000UL),
19324 const0_rtx, const0_rtx));
19325 exponents = validize_mem (force_const_mem (V4SImode, x));
19327 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19328 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19330 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19331 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19332 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19333 (0x1.0p84 + double(fp_value_hi_xmm)).
19334 Note these exponents differ by 32. */
19336 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19338 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19339 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
19340 real_ldexp (&bias_lo_rvt, &dconst1, 52);
19341 real_ldexp (&bias_hi_rvt, &dconst1, 84);
19342 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19343 x = const_double_from_real_value (bias_hi_rvt, DFmode);
19344 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19345 biases = validize_mem (force_const_mem (V2DFmode, biases));
19346 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19348 /* Add the upper and lower DFmode values together. */
19349 if (TARGET_SSE3)
19350 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19351 else
19353 x = copy_to_mode_reg (V2DFmode, fp_xmm);
19354 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19355 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19358 ix86_expand_vector_extract (false, target, fp_xmm, 0);
19361 /* Not used, but eases macroization of patterns. */
19362 void
19363 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19365 gcc_unreachable ();
19368 /* Convert an unsigned SImode value into a DFmode. Only currently used
19369 for SSE, but applicable anywhere. */
19371 void
19372 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19374 REAL_VALUE_TYPE TWO31r;
19375 rtx x, fp;
19377 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19378 NULL, 1, OPTAB_DIRECT);
19380 fp = gen_reg_rtx (DFmode);
19381 emit_insn (gen_floatsidf2 (fp, x));
19383 real_ldexp (&TWO31r, &dconst1, 31);
19384 x = const_double_from_real_value (TWO31r, DFmode);
19386 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19387 if (x != target)
19388 emit_move_insn (target, x);
19391 /* Convert a signed DImode value into a DFmode. Only used for SSE in
19392 32-bit mode; otherwise we have a direct convert instruction. */
19394 void
19395 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19397 REAL_VALUE_TYPE TWO32r;
19398 rtx fp_lo, fp_hi, x;
19400 fp_lo = gen_reg_rtx (DFmode);
19401 fp_hi = gen_reg_rtx (DFmode);
19403 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19405 real_ldexp (&TWO32r, &dconst1, 32);
19406 x = const_double_from_real_value (TWO32r, DFmode);
19407 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19409 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19411 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19412 0, OPTAB_DIRECT);
19413 if (x != target)
19414 emit_move_insn (target, x);
19417 /* Convert an unsigned SImode value into a SFmode, using only SSE.
19418 For x86_32, -mfpmath=sse, !optimize_size only. */
19419 void
19420 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19422 REAL_VALUE_TYPE ONE16r;
19423 rtx fp_hi, fp_lo, int_hi, int_lo, x;
19425 real_ldexp (&ONE16r, &dconst1, 16);
19426 x = const_double_from_real_value (ONE16r, SFmode);
19427 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19428 NULL, 0, OPTAB_DIRECT);
19429 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19430 NULL, 0, OPTAB_DIRECT);
19431 fp_hi = gen_reg_rtx (SFmode);
19432 fp_lo = gen_reg_rtx (SFmode);
19433 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19434 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19435 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19436 0, OPTAB_DIRECT);
19437 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19438 0, OPTAB_DIRECT);
19439 if (!rtx_equal_p (target, fp_hi))
19440 emit_move_insn (target, fp_hi);
19443 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
19444 a vector of unsigned ints VAL to vector of floats TARGET. */
19446 void
19447 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19449 rtx tmp[8];
19450 REAL_VALUE_TYPE TWO16r;
19451 machine_mode intmode = GET_MODE (val);
19452 machine_mode fltmode = GET_MODE (target);
19453 rtx (*cvt) (rtx, rtx);
19455 if (intmode == V4SImode)
19456 cvt = gen_floatv4siv4sf2;
19457 else
19458 cvt = gen_floatv8siv8sf2;
19459 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19460 tmp[0] = force_reg (intmode, tmp[0]);
19461 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19462 OPTAB_DIRECT);
19463 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19464 NULL_RTX, 1, OPTAB_DIRECT);
19465 tmp[3] = gen_reg_rtx (fltmode);
19466 emit_insn (cvt (tmp[3], tmp[1]));
19467 tmp[4] = gen_reg_rtx (fltmode);
19468 emit_insn (cvt (tmp[4], tmp[2]));
19469 real_ldexp (&TWO16r, &dconst1, 16);
19470 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19471 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19472 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19473 OPTAB_DIRECT);
19474 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19475 OPTAB_DIRECT);
19476 if (tmp[7] != target)
19477 emit_move_insn (target, tmp[7]);
19480 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19481 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19482 This is done by doing just signed conversion if < 0x1p31, and otherwise by
19483 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
19486 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19488 REAL_VALUE_TYPE TWO31r;
19489 rtx two31r, tmp[4];
19490 machine_mode mode = GET_MODE (val);
19491 machine_mode scalarmode = GET_MODE_INNER (mode);
19492 machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19493 rtx (*cmp) (rtx, rtx, rtx, rtx);
19494 int i;
19496 for (i = 0; i < 3; i++)
19497 tmp[i] = gen_reg_rtx (mode);
19498 real_ldexp (&TWO31r, &dconst1, 31);
19499 two31r = const_double_from_real_value (TWO31r, scalarmode);
19500 two31r = ix86_build_const_vector (mode, 1, two31r);
19501 two31r = force_reg (mode, two31r);
19502 switch (mode)
19504 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19505 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19506 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19507 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19508 default: gcc_unreachable ();
19510 tmp[3] = gen_rtx_LE (mode, two31r, val);
19511 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19512 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19513 0, OPTAB_DIRECT);
19514 if (intmode == V4SImode || TARGET_AVX2)
19515 *xorp = expand_simple_binop (intmode, ASHIFT,
19516 gen_lowpart (intmode, tmp[0]),
19517 GEN_INT (31), NULL_RTX, 0,
19518 OPTAB_DIRECT);
19519 else
19521 rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31);
19522 two31 = ix86_build_const_vector (intmode, 1, two31);
19523 *xorp = expand_simple_binop (intmode, AND,
19524 gen_lowpart (intmode, tmp[0]),
19525 two31, NULL_RTX, 0,
19526 OPTAB_DIRECT);
19528 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19529 0, OPTAB_DIRECT);
19532 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
19533 then replicate the value for all elements of the vector
19534 register. */
19537 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19539 int i, n_elt;
19540 rtvec v;
19541 machine_mode scalar_mode;
19543 switch (mode)
19545 case V64QImode:
19546 case V32QImode:
19547 case V16QImode:
19548 case V32HImode:
19549 case V16HImode:
19550 case V8HImode:
19551 case V16SImode:
19552 case V8SImode:
19553 case V4SImode:
19554 case V8DImode:
19555 case V4DImode:
19556 case V2DImode:
19557 gcc_assert (vect);
19558 case V16SFmode:
19559 case V8SFmode:
19560 case V4SFmode:
19561 case V8DFmode:
19562 case V4DFmode:
19563 case V2DFmode:
19564 n_elt = GET_MODE_NUNITS (mode);
19565 v = rtvec_alloc (n_elt);
19566 scalar_mode = GET_MODE_INNER (mode);
19568 RTVEC_ELT (v, 0) = value;
19570 for (i = 1; i < n_elt; ++i)
19571 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19573 return gen_rtx_CONST_VECTOR (mode, v);
19575 default:
19576 gcc_unreachable ();
19580 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19581 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19582 for an SSE register. If VECT is true, then replicate the mask for
19583 all elements of the vector register. If INVERT is true, then create
19584 a mask excluding the sign bit. */
19587 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19589 machine_mode vec_mode, imode;
19590 wide_int w;
19591 rtx mask, v;
19593 switch (mode)
19595 case V16SImode:
19596 case V16SFmode:
19597 case V8SImode:
19598 case V4SImode:
19599 case V8SFmode:
19600 case V4SFmode:
19601 vec_mode = mode;
19602 mode = GET_MODE_INNER (mode);
19603 imode = SImode;
19604 break;
19606 case V8DImode:
19607 case V4DImode:
19608 case V2DImode:
19609 case V8DFmode:
19610 case V4DFmode:
19611 case V2DFmode:
19612 vec_mode = mode;
19613 mode = GET_MODE_INNER (mode);
19614 imode = DImode;
19615 break;
19617 case TImode:
19618 case TFmode:
19619 vec_mode = VOIDmode;
19620 imode = TImode;
19621 break;
19623 default:
19624 gcc_unreachable ();
19627 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (mode) - 1,
19628 GET_MODE_BITSIZE (mode));
19629 if (invert)
19630 w = wi::bit_not (w);
19632 /* Force this value into the low part of a fp vector constant. */
19633 mask = immed_wide_int_const (w, imode);
19634 mask = gen_lowpart (mode, mask);
19636 if (vec_mode == VOIDmode)
19637 return force_reg (mode, mask);
19639 v = ix86_build_const_vector (vec_mode, vect, mask);
19640 return force_reg (vec_mode, v);
19643 /* Generate code for floating point ABS or NEG. */
19645 void
19646 ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19647 rtx operands[])
19649 rtx mask, set, dst, src;
19650 bool use_sse = false;
19651 bool vector_mode = VECTOR_MODE_P (mode);
19652 machine_mode vmode = mode;
19654 if (vector_mode)
19655 use_sse = true;
19656 else if (mode == TFmode)
19657 use_sse = true;
19658 else if (TARGET_SSE_MATH)
19660 use_sse = SSE_FLOAT_MODE_P (mode);
19661 if (mode == SFmode)
19662 vmode = V4SFmode;
19663 else if (mode == DFmode)
19664 vmode = V2DFmode;
19667 /* NEG and ABS performed with SSE use bitwise mask operations.
19668 Create the appropriate mask now. */
19669 if (use_sse)
19670 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19671 else
19672 mask = NULL_RTX;
19674 dst = operands[0];
19675 src = operands[1];
19677 set = gen_rtx_fmt_e (code, mode, src);
19678 set = gen_rtx_SET (dst, set);
19680 if (mask)
19682 rtx use, clob;
19683 rtvec par;
19685 use = gen_rtx_USE (VOIDmode, mask);
19686 if (vector_mode)
19687 par = gen_rtvec (2, set, use);
19688 else
19690 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19691 par = gen_rtvec (3, set, use, clob);
19693 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19695 else
19696 emit_insn (set);
19699 /* Expand a copysign operation. Special case operand 0 being a constant. */
19701 void
19702 ix86_expand_copysign (rtx operands[])
19704 machine_mode mode, vmode;
19705 rtx dest, op0, op1, mask, nmask;
19707 dest = operands[0];
19708 op0 = operands[1];
19709 op1 = operands[2];
19711 mode = GET_MODE (dest);
19713 if (mode == SFmode)
19714 vmode = V4SFmode;
19715 else if (mode == DFmode)
19716 vmode = V2DFmode;
19717 else
19718 vmode = mode;
19720 if (CONST_DOUBLE_P (op0))
19722 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19724 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19725 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19727 if (mode == SFmode || mode == DFmode)
19729 if (op0 == CONST0_RTX (mode))
19730 op0 = CONST0_RTX (vmode);
19731 else
19733 rtx v = ix86_build_const_vector (vmode, false, op0);
19735 op0 = force_reg (vmode, v);
19738 else if (op0 != CONST0_RTX (mode))
19739 op0 = force_reg (mode, op0);
19741 mask = ix86_build_signbit_mask (vmode, 0, 0);
19743 if (mode == SFmode)
19744 copysign_insn = gen_copysignsf3_const;
19745 else if (mode == DFmode)
19746 copysign_insn = gen_copysigndf3_const;
19747 else
19748 copysign_insn = gen_copysigntf3_const;
19750 emit_insn (copysign_insn (dest, op0, op1, mask));
19752 else
19754 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19756 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19757 mask = ix86_build_signbit_mask (vmode, 0, 0);
19759 if (mode == SFmode)
19760 copysign_insn = gen_copysignsf3_var;
19761 else if (mode == DFmode)
19762 copysign_insn = gen_copysigndf3_var;
19763 else
19764 copysign_insn = gen_copysigntf3_var;
19766 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19770 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19771 be a constant, and so has already been expanded into a vector constant. */
19773 void
19774 ix86_split_copysign_const (rtx operands[])
19776 machine_mode mode, vmode;
19777 rtx dest, op0, mask, x;
19779 dest = operands[0];
19780 op0 = operands[1];
19781 mask = operands[3];
19783 mode = GET_MODE (dest);
19784 vmode = GET_MODE (mask);
19786 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19787 x = gen_rtx_AND (vmode, dest, mask);
19788 emit_insn (gen_rtx_SET (dest, x));
19790 if (op0 != CONST0_RTX (vmode))
19792 x = gen_rtx_IOR (vmode, dest, op0);
19793 emit_insn (gen_rtx_SET (dest, x));
19797 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19798 so we have to do two masks. */
19800 void
19801 ix86_split_copysign_var (rtx operands[])
19803 machine_mode mode, vmode;
19804 rtx dest, scratch, op0, op1, mask, nmask, x;
19806 dest = operands[0];
19807 scratch = operands[1];
19808 op0 = operands[2];
19809 op1 = operands[3];
19810 nmask = operands[4];
19811 mask = operands[5];
19813 mode = GET_MODE (dest);
19814 vmode = GET_MODE (mask);
19816 if (rtx_equal_p (op0, op1))
19818 /* Shouldn't happen often (it's useless, obviously), but when it does
19819 we'd generate incorrect code if we continue below. */
19820 emit_move_insn (dest, op0);
19821 return;
19824 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19826 gcc_assert (REGNO (op1) == REGNO (scratch));
19828 x = gen_rtx_AND (vmode, scratch, mask);
19829 emit_insn (gen_rtx_SET (scratch, x));
19831 dest = mask;
19832 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19833 x = gen_rtx_NOT (vmode, dest);
19834 x = gen_rtx_AND (vmode, x, op0);
19835 emit_insn (gen_rtx_SET (dest, x));
19837 else
19839 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19841 x = gen_rtx_AND (vmode, scratch, mask);
19843 else /* alternative 2,4 */
19845 gcc_assert (REGNO (mask) == REGNO (scratch));
19846 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19847 x = gen_rtx_AND (vmode, scratch, op1);
19849 emit_insn (gen_rtx_SET (scratch, x));
19851 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19853 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19854 x = gen_rtx_AND (vmode, dest, nmask);
19856 else /* alternative 3,4 */
19858 gcc_assert (REGNO (nmask) == REGNO (dest));
19859 dest = nmask;
19860 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19861 x = gen_rtx_AND (vmode, dest, op0);
19863 emit_insn (gen_rtx_SET (dest, x));
19866 x = gen_rtx_IOR (vmode, dest, scratch);
19867 emit_insn (gen_rtx_SET (dest, x));
19870 /* Return TRUE or FALSE depending on whether the first SET in INSN
19871 has source and destination with matching CC modes, and that the
19872 CC mode is at least as constrained as REQ_MODE. */
19874 bool
19875 ix86_match_ccmode (rtx insn, machine_mode req_mode)
19877 rtx set;
19878 machine_mode set_mode;
19880 set = PATTERN (insn);
19881 if (GET_CODE (set) == PARALLEL)
19882 set = XVECEXP (set, 0, 0);
19883 gcc_assert (GET_CODE (set) == SET);
19884 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19886 set_mode = GET_MODE (SET_DEST (set));
19887 switch (set_mode)
19889 case CCNOmode:
19890 if (req_mode != CCNOmode
19891 && (req_mode != CCmode
19892 || XEXP (SET_SRC (set), 1) != const0_rtx))
19893 return false;
19894 break;
19895 case CCmode:
19896 if (req_mode == CCGCmode)
19897 return false;
19898 /* FALLTHRU */
19899 case CCGCmode:
19900 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19901 return false;
19902 /* FALLTHRU */
19903 case CCGOCmode:
19904 if (req_mode == CCZmode)
19905 return false;
19906 /* FALLTHRU */
19907 case CCZmode:
19908 break;
19910 case CCAmode:
19911 case CCCmode:
19912 case CCOmode:
19913 case CCPmode:
19914 case CCSmode:
19915 if (set_mode != req_mode)
19916 return false;
19917 break;
19919 default:
19920 gcc_unreachable ();
19923 return GET_MODE (SET_SRC (set)) == set_mode;
19926 /* Generate insn patterns to do an integer compare of OPERANDS. */
19928 static rtx
19929 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19931 machine_mode cmpmode;
19932 rtx tmp, flags;
19934 cmpmode = SELECT_CC_MODE (code, op0, op1);
19935 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19937 /* This is very simple, but making the interface the same as in the
19938 FP case makes the rest of the code easier. */
19939 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19940 emit_insn (gen_rtx_SET (flags, tmp));
19942 /* Return the test that should be put into the flags user, i.e.
19943 the bcc, scc, or cmov instruction. */
19944 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19947 /* Figure out whether to use ordered or unordered fp comparisons.
19948 Return the appropriate mode to use. */
19950 machine_mode
19951 ix86_fp_compare_mode (enum rtx_code)
19953 /* ??? In order to make all comparisons reversible, we do all comparisons
19954 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19955 all forms trapping and nontrapping comparisons, we can make inequality
19956 comparisons trapping again, since it results in better code when using
19957 FCOM based compares. */
19958 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19961 machine_mode
19962 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19964 machine_mode mode = GET_MODE (op0);
19966 if (SCALAR_FLOAT_MODE_P (mode))
19968 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19969 return ix86_fp_compare_mode (code);
19972 switch (code)
19974 /* Only zero flag is needed. */
19975 case EQ: /* ZF=0 */
19976 case NE: /* ZF!=0 */
19977 return CCZmode;
19978 /* Codes needing carry flag. */
19979 case GEU: /* CF=0 */
19980 case LTU: /* CF=1 */
19981 /* Detect overflow checks. They need just the carry flag. */
19982 if (GET_CODE (op0) == PLUS
19983 && rtx_equal_p (op1, XEXP (op0, 0)))
19984 return CCCmode;
19985 else
19986 return CCmode;
19987 case GTU: /* CF=0 & ZF=0 */
19988 case LEU: /* CF=1 | ZF=1 */
19989 return CCmode;
19990 /* Codes possibly doable only with sign flag when
19991 comparing against zero. */
19992 case GE: /* SF=OF or SF=0 */
19993 case LT: /* SF<>OF or SF=1 */
19994 if (op1 == const0_rtx)
19995 return CCGOCmode;
19996 else
19997 /* For other cases Carry flag is not required. */
19998 return CCGCmode;
19999 /* Codes doable only with sign flag when comparing
20000 against zero, but we miss jump instruction for it
20001 so we need to use relational tests against overflow
20002 that thus needs to be zero. */
20003 case GT: /* ZF=0 & SF=OF */
20004 case LE: /* ZF=1 | SF<>OF */
20005 if (op1 == const0_rtx)
20006 return CCNOmode;
20007 else
20008 return CCGCmode;
20009 /* strcmp pattern do (use flags) and combine may ask us for proper
20010 mode. */
20011 case USE:
20012 return CCmode;
20013 default:
20014 gcc_unreachable ();
20018 /* Return the fixed registers used for condition codes. */
20020 static bool
20021 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
20023 *p1 = FLAGS_REG;
20024 *p2 = FPSR_REG;
20025 return true;
20028 /* If two condition code modes are compatible, return a condition code
20029 mode which is compatible with both. Otherwise, return
20030 VOIDmode. */
20032 static machine_mode
20033 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20035 if (m1 == m2)
20036 return m1;
20038 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20039 return VOIDmode;
20041 if ((m1 == CCGCmode && m2 == CCGOCmode)
20042 || (m1 == CCGOCmode && m2 == CCGCmode))
20043 return CCGCmode;
20045 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20046 return m2;
20047 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20048 return m1;
20050 switch (m1)
20052 default:
20053 gcc_unreachable ();
20055 case CCmode:
20056 case CCGCmode:
20057 case CCGOCmode:
20058 case CCNOmode:
20059 case CCAmode:
20060 case CCCmode:
20061 case CCOmode:
20062 case CCPmode:
20063 case CCSmode:
20064 case CCZmode:
20065 switch (m2)
20067 default:
20068 return VOIDmode;
20070 case CCmode:
20071 case CCGCmode:
20072 case CCGOCmode:
20073 case CCNOmode:
20074 case CCAmode:
20075 case CCCmode:
20076 case CCOmode:
20077 case CCPmode:
20078 case CCSmode:
20079 case CCZmode:
20080 return CCmode;
20083 case CCFPmode:
20084 case CCFPUmode:
20085 /* These are only compatible with themselves, which we already
20086 checked above. */
20087 return VOIDmode;
20092 /* Return a comparison we can do and that it is equivalent to
20093 swap_condition (code) apart possibly from orderedness.
20094 But, never change orderedness if TARGET_IEEE_FP, returning
20095 UNKNOWN in that case if necessary. */
20097 static enum rtx_code
20098 ix86_fp_swap_condition (enum rtx_code code)
20100 switch (code)
20102 case GT: /* GTU - CF=0 & ZF=0 */
20103 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20104 case GE: /* GEU - CF=0 */
20105 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20106 case UNLT: /* LTU - CF=1 */
20107 return TARGET_IEEE_FP ? UNKNOWN : GT;
20108 case UNLE: /* LEU - CF=1 | ZF=1 */
20109 return TARGET_IEEE_FP ? UNKNOWN : GE;
20110 default:
20111 return swap_condition (code);
20115 /* Return cost of comparison CODE using the best strategy for performance.
20116 All following functions do use number of instructions as a cost metrics.
20117 In future this should be tweaked to compute bytes for optimize_size and
20118 take into account performance of various instructions on various CPUs. */
20120 static int
20121 ix86_fp_comparison_cost (enum rtx_code code)
20123 int arith_cost;
20125 /* The cost of code using bit-twiddling on %ah. */
20126 switch (code)
20128 case UNLE:
20129 case UNLT:
20130 case LTGT:
20131 case GT:
20132 case GE:
20133 case UNORDERED:
20134 case ORDERED:
20135 case UNEQ:
20136 arith_cost = 4;
20137 break;
20138 case LT:
20139 case NE:
20140 case EQ:
20141 case UNGE:
20142 arith_cost = TARGET_IEEE_FP ? 5 : 4;
20143 break;
20144 case LE:
20145 case UNGT:
20146 arith_cost = TARGET_IEEE_FP ? 6 : 4;
20147 break;
20148 default:
20149 gcc_unreachable ();
20152 switch (ix86_fp_comparison_strategy (code))
20154 case IX86_FPCMP_COMI:
20155 return arith_cost > 4 ? 3 : 2;
20156 case IX86_FPCMP_SAHF:
20157 return arith_cost > 4 ? 4 : 3;
20158 default:
20159 return arith_cost;
20163 /* Return strategy to use for floating-point. We assume that fcomi is always
20164 preferrable where available, since that is also true when looking at size
20165 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
20167 enum ix86_fpcmp_strategy
20168 ix86_fp_comparison_strategy (enum rtx_code)
20170 /* Do fcomi/sahf based test when profitable. */
20172 if (TARGET_CMOVE)
20173 return IX86_FPCMP_COMI;
20175 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20176 return IX86_FPCMP_SAHF;
20178 return IX86_FPCMP_ARITH;
20181 /* Swap, force into registers, or otherwise massage the two operands
20182 to a fp comparison. The operands are updated in place; the new
20183 comparison code is returned. */
20185 static enum rtx_code
20186 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20188 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20189 rtx op0 = *pop0, op1 = *pop1;
20190 machine_mode op_mode = GET_MODE (op0);
20191 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20193 /* All of the unordered compare instructions only work on registers.
20194 The same is true of the fcomi compare instructions. The XFmode
20195 compare instructions require registers except when comparing
20196 against zero or when converting operand 1 from fixed point to
20197 floating point. */
20199 if (!is_sse
20200 && (fpcmp_mode == CCFPUmode
20201 || (op_mode == XFmode
20202 && ! (standard_80387_constant_p (op0) == 1
20203 || standard_80387_constant_p (op1) == 1)
20204 && GET_CODE (op1) != FLOAT)
20205 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20207 op0 = force_reg (op_mode, op0);
20208 op1 = force_reg (op_mode, op1);
20210 else
20212 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
20213 things around if they appear profitable, otherwise force op0
20214 into a register. */
20216 if (standard_80387_constant_p (op0) == 0
20217 || (MEM_P (op0)
20218 && ! (standard_80387_constant_p (op1) == 0
20219 || MEM_P (op1))))
20221 enum rtx_code new_code = ix86_fp_swap_condition (code);
20222 if (new_code != UNKNOWN)
20224 std::swap (op0, op1);
20225 code = new_code;
20229 if (!REG_P (op0))
20230 op0 = force_reg (op_mode, op0);
20232 if (CONSTANT_P (op1))
20234 int tmp = standard_80387_constant_p (op1);
20235 if (tmp == 0)
20236 op1 = validize_mem (force_const_mem (op_mode, op1));
20237 else if (tmp == 1)
20239 if (TARGET_CMOVE)
20240 op1 = force_reg (op_mode, op1);
20242 else
20243 op1 = force_reg (op_mode, op1);
20247 /* Try to rearrange the comparison to make it cheaper. */
20248 if (ix86_fp_comparison_cost (code)
20249 > ix86_fp_comparison_cost (swap_condition (code))
20250 && (REG_P (op1) || can_create_pseudo_p ()))
20252 std::swap (op0, op1);
20253 code = swap_condition (code);
20254 if (!REG_P (op0))
20255 op0 = force_reg (op_mode, op0);
20258 *pop0 = op0;
20259 *pop1 = op1;
20260 return code;
20263 /* Convert comparison codes we use to represent FP comparison to integer
20264 code that will result in proper branch. Return UNKNOWN if no such code
20265 is available. */
20267 enum rtx_code
20268 ix86_fp_compare_code_to_integer (enum rtx_code code)
20270 switch (code)
20272 case GT:
20273 return GTU;
20274 case GE:
20275 return GEU;
20276 case ORDERED:
20277 case UNORDERED:
20278 return code;
20279 break;
20280 case UNEQ:
20281 return EQ;
20282 break;
20283 case UNLT:
20284 return LTU;
20285 break;
20286 case UNLE:
20287 return LEU;
20288 break;
20289 case LTGT:
20290 return NE;
20291 break;
20292 default:
20293 return UNKNOWN;
20297 /* Generate insn patterns to do a floating point compare of OPERANDS. */
20299 static rtx
20300 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20302 machine_mode fpcmp_mode, intcmp_mode;
20303 rtx tmp, tmp2;
20305 fpcmp_mode = ix86_fp_compare_mode (code);
20306 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20308 /* Do fcomi/sahf based test when profitable. */
20309 switch (ix86_fp_comparison_strategy (code))
20311 case IX86_FPCMP_COMI:
20312 intcmp_mode = fpcmp_mode;
20313 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20314 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20315 emit_insn (tmp);
20316 break;
20318 case IX86_FPCMP_SAHF:
20319 intcmp_mode = fpcmp_mode;
20320 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20321 tmp = gen_rtx_SET (gen_rtx_REG (fpcmp_mode, FLAGS_REG), tmp);
20323 if (!scratch)
20324 scratch = gen_reg_rtx (HImode);
20325 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20326 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20327 break;
20329 case IX86_FPCMP_ARITH:
20330 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
20331 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20332 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20333 if (!scratch)
20334 scratch = gen_reg_rtx (HImode);
20335 emit_insn (gen_rtx_SET (scratch, tmp2));
20337 /* In the unordered case, we have to check C2 for NaN's, which
20338 doesn't happen to work out to anything nice combination-wise.
20339 So do some bit twiddling on the value we've got in AH to come
20340 up with an appropriate set of condition codes. */
20342 intcmp_mode = CCNOmode;
20343 switch (code)
20345 case GT:
20346 case UNGT:
20347 if (code == GT || !TARGET_IEEE_FP)
20349 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20350 code = EQ;
20352 else
20354 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20355 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20356 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20357 intcmp_mode = CCmode;
20358 code = GEU;
20360 break;
20361 case LT:
20362 case UNLT:
20363 if (code == LT && TARGET_IEEE_FP)
20365 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20366 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20367 intcmp_mode = CCmode;
20368 code = EQ;
20370 else
20372 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20373 code = NE;
20375 break;
20376 case GE:
20377 case UNGE:
20378 if (code == GE || !TARGET_IEEE_FP)
20380 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20381 code = EQ;
20383 else
20385 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20386 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20387 code = NE;
20389 break;
20390 case LE:
20391 case UNLE:
20392 if (code == LE && TARGET_IEEE_FP)
20394 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20395 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20396 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20397 intcmp_mode = CCmode;
20398 code = LTU;
20400 else
20402 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20403 code = NE;
20405 break;
20406 case EQ:
20407 case UNEQ:
20408 if (code == EQ && TARGET_IEEE_FP)
20410 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20411 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20412 intcmp_mode = CCmode;
20413 code = EQ;
20415 else
20417 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20418 code = NE;
20420 break;
20421 case NE:
20422 case LTGT:
20423 if (code == NE && TARGET_IEEE_FP)
20425 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20426 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20427 GEN_INT (0x40)));
20428 code = NE;
20430 else
20432 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20433 code = EQ;
20435 break;
20437 case UNORDERED:
20438 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20439 code = NE;
20440 break;
20441 case ORDERED:
20442 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20443 code = EQ;
20444 break;
20446 default:
20447 gcc_unreachable ();
20449 break;
20451 default:
20452 gcc_unreachable();
20455 /* Return the test that should be put into the flags user, i.e.
20456 the bcc, scc, or cmov instruction. */
20457 return gen_rtx_fmt_ee (code, VOIDmode,
20458 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20459 const0_rtx);
20462 static rtx
20463 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20465 rtx ret;
20467 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20468 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20470 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20472 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20473 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20475 else
20476 ret = ix86_expand_int_compare (code, op0, op1);
20478 return ret;
20481 void
20482 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20484 machine_mode mode = GET_MODE (op0);
20485 rtx tmp;
20487 switch (mode)
20489 case SFmode:
20490 case DFmode:
20491 case XFmode:
20492 case QImode:
20493 case HImode:
20494 case SImode:
20495 simple:
20496 tmp = ix86_expand_compare (code, op0, op1);
20497 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20498 gen_rtx_LABEL_REF (VOIDmode, label),
20499 pc_rtx);
20500 emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
20501 return;
20503 case DImode:
20504 if (TARGET_64BIT)
20505 goto simple;
20506 case TImode:
20507 /* Expand DImode branch into multiple compare+branch. */
20509 rtx lo[2], hi[2];
20510 rtx_code_label *label2;
20511 enum rtx_code code1, code2, code3;
20512 machine_mode submode;
20514 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20516 std::swap (op0, op1);
20517 code = swap_condition (code);
20520 split_double_mode (mode, &op0, 1, lo+0, hi+0);
20521 split_double_mode (mode, &op1, 1, lo+1, hi+1);
20523 submode = mode == DImode ? SImode : DImode;
20525 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20526 avoid two branches. This costs one extra insn, so disable when
20527 optimizing for size. */
20529 if ((code == EQ || code == NE)
20530 && (!optimize_insn_for_size_p ()
20531 || hi[1] == const0_rtx || lo[1] == const0_rtx))
20533 rtx xor0, xor1;
20535 xor1 = hi[0];
20536 if (hi[1] != const0_rtx)
20537 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20538 NULL_RTX, 0, OPTAB_WIDEN);
20540 xor0 = lo[0];
20541 if (lo[1] != const0_rtx)
20542 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20543 NULL_RTX, 0, OPTAB_WIDEN);
20545 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20546 NULL_RTX, 0, OPTAB_WIDEN);
20548 ix86_expand_branch (code, tmp, const0_rtx, label);
20549 return;
20552 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20553 op1 is a constant and the low word is zero, then we can just
20554 examine the high word. Similarly for low word -1 and
20555 less-or-equal-than or greater-than. */
20557 if (CONST_INT_P (hi[1]))
20558 switch (code)
20560 case LT: case LTU: case GE: case GEU:
20561 if (lo[1] == const0_rtx)
20563 ix86_expand_branch (code, hi[0], hi[1], label);
20564 return;
20566 break;
20567 case LE: case LEU: case GT: case GTU:
20568 if (lo[1] == constm1_rtx)
20570 ix86_expand_branch (code, hi[0], hi[1], label);
20571 return;
20573 break;
20574 default:
20575 break;
20578 /* Otherwise, we need two or three jumps. */
20580 label2 = gen_label_rtx ();
20582 code1 = code;
20583 code2 = swap_condition (code);
20584 code3 = unsigned_condition (code);
20586 switch (code)
20588 case LT: case GT: case LTU: case GTU:
20589 break;
20591 case LE: code1 = LT; code2 = GT; break;
20592 case GE: code1 = GT; code2 = LT; break;
20593 case LEU: code1 = LTU; code2 = GTU; break;
20594 case GEU: code1 = GTU; code2 = LTU; break;
20596 case EQ: code1 = UNKNOWN; code2 = NE; break;
20597 case NE: code2 = UNKNOWN; break;
20599 default:
20600 gcc_unreachable ();
20604 * a < b =>
20605 * if (hi(a) < hi(b)) goto true;
20606 * if (hi(a) > hi(b)) goto false;
20607 * if (lo(a) < lo(b)) goto true;
20608 * false:
20611 if (code1 != UNKNOWN)
20612 ix86_expand_branch (code1, hi[0], hi[1], label);
20613 if (code2 != UNKNOWN)
20614 ix86_expand_branch (code2, hi[0], hi[1], label2);
20616 ix86_expand_branch (code3, lo[0], lo[1], label);
20618 if (code2 != UNKNOWN)
20619 emit_label (label2);
20620 return;
20623 default:
20624 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20625 goto simple;
20629 /* Split branch based on floating point condition. */
20630 void
20631 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20632 rtx target1, rtx target2, rtx tmp)
20634 rtx condition;
20635 rtx i;
20637 if (target2 != pc_rtx)
20639 std::swap (target1, target2);
20640 code = reverse_condition_maybe_unordered (code);
20643 condition = ix86_expand_fp_compare (code, op1, op2,
20644 tmp);
20646 i = emit_jump_insn (gen_rtx_SET
20647 (pc_rtx,
20648 gen_rtx_IF_THEN_ELSE (VOIDmode,
20649 condition, target1, target2)));
20650 if (split_branch_probability >= 0)
20651 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20654 void
20655 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20657 rtx ret;
20659 gcc_assert (GET_MODE (dest) == QImode);
20661 ret = ix86_expand_compare (code, op0, op1);
20662 PUT_MODE (ret, QImode);
20663 emit_insn (gen_rtx_SET (dest, ret));
20666 /* Expand comparison setting or clearing carry flag. Return true when
20667 successful and set pop for the operation. */
20668 static bool
20669 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20671 machine_mode mode =
20672 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20674 /* Do not handle double-mode compares that go through special path. */
20675 if (mode == (TARGET_64BIT ? TImode : DImode))
20676 return false;
20678 if (SCALAR_FLOAT_MODE_P (mode))
20680 rtx compare_op;
20681 rtx_insn *compare_seq;
20683 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20685 /* Shortcut: following common codes never translate
20686 into carry flag compares. */
20687 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20688 || code == ORDERED || code == UNORDERED)
20689 return false;
20691 /* These comparisons require zero flag; swap operands so they won't. */
20692 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20693 && !TARGET_IEEE_FP)
20695 std::swap (op0, op1);
20696 code = swap_condition (code);
20699 /* Try to expand the comparison and verify that we end up with
20700 carry flag based comparison. This fails to be true only when
20701 we decide to expand comparison using arithmetic that is not
20702 too common scenario. */
20703 start_sequence ();
20704 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20705 compare_seq = get_insns ();
20706 end_sequence ();
20708 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20709 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20710 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20711 else
20712 code = GET_CODE (compare_op);
20714 if (code != LTU && code != GEU)
20715 return false;
20717 emit_insn (compare_seq);
20718 *pop = compare_op;
20719 return true;
20722 if (!INTEGRAL_MODE_P (mode))
20723 return false;
20725 switch (code)
20727 case LTU:
20728 case GEU:
20729 break;
20731 /* Convert a==0 into (unsigned)a<1. */
20732 case EQ:
20733 case NE:
20734 if (op1 != const0_rtx)
20735 return false;
20736 op1 = const1_rtx;
20737 code = (code == EQ ? LTU : GEU);
20738 break;
20740 /* Convert a>b into b<a or a>=b-1. */
20741 case GTU:
20742 case LEU:
20743 if (CONST_INT_P (op1))
20745 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20746 /* Bail out on overflow. We still can swap operands but that
20747 would force loading of the constant into register. */
20748 if (op1 == const0_rtx
20749 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20750 return false;
20751 code = (code == GTU ? GEU : LTU);
20753 else
20755 std::swap (op0, op1);
20756 code = (code == GTU ? LTU : GEU);
20758 break;
20760 /* Convert a>=0 into (unsigned)a<0x80000000. */
20761 case LT:
20762 case GE:
20763 if (mode == DImode || op1 != const0_rtx)
20764 return false;
20765 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20766 code = (code == LT ? GEU : LTU);
20767 break;
20768 case LE:
20769 case GT:
20770 if (mode == DImode || op1 != constm1_rtx)
20771 return false;
20772 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20773 code = (code == LE ? GEU : LTU);
20774 break;
20776 default:
20777 return false;
20779 /* Swapping operands may cause constant to appear as first operand. */
20780 if (!nonimmediate_operand (op0, VOIDmode))
20782 if (!can_create_pseudo_p ())
20783 return false;
20784 op0 = force_reg (mode, op0);
20786 *pop = ix86_expand_compare (code, op0, op1);
20787 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20788 return true;
20791 bool
20792 ix86_expand_int_movcc (rtx operands[])
20794 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20795 rtx_insn *compare_seq;
20796 rtx compare_op;
20797 machine_mode mode = GET_MODE (operands[0]);
20798 bool sign_bit_compare_p = false;
20799 rtx op0 = XEXP (operands[1], 0);
20800 rtx op1 = XEXP (operands[1], 1);
20802 if (GET_MODE (op0) == TImode
20803 || (GET_MODE (op0) == DImode
20804 && !TARGET_64BIT))
20805 return false;
20807 start_sequence ();
20808 compare_op = ix86_expand_compare (code, op0, op1);
20809 compare_seq = get_insns ();
20810 end_sequence ();
20812 compare_code = GET_CODE (compare_op);
20814 if ((op1 == const0_rtx && (code == GE || code == LT))
20815 || (op1 == constm1_rtx && (code == GT || code == LE)))
20816 sign_bit_compare_p = true;
20818 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20819 HImode insns, we'd be swallowed in word prefix ops. */
20821 if ((mode != HImode || TARGET_FAST_PREFIX)
20822 && (mode != (TARGET_64BIT ? TImode : DImode))
20823 && CONST_INT_P (operands[2])
20824 && CONST_INT_P (operands[3]))
20826 rtx out = operands[0];
20827 HOST_WIDE_INT ct = INTVAL (operands[2]);
20828 HOST_WIDE_INT cf = INTVAL (operands[3]);
20829 HOST_WIDE_INT diff;
20831 diff = ct - cf;
20832 /* Sign bit compares are better done using shifts than we do by using
20833 sbb. */
20834 if (sign_bit_compare_p
20835 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20837 /* Detect overlap between destination and compare sources. */
20838 rtx tmp = out;
20840 if (!sign_bit_compare_p)
20842 rtx flags;
20843 bool fpcmp = false;
20845 compare_code = GET_CODE (compare_op);
20847 flags = XEXP (compare_op, 0);
20849 if (GET_MODE (flags) == CCFPmode
20850 || GET_MODE (flags) == CCFPUmode)
20852 fpcmp = true;
20853 compare_code
20854 = ix86_fp_compare_code_to_integer (compare_code);
20857 /* To simplify rest of code, restrict to the GEU case. */
20858 if (compare_code == LTU)
20860 std::swap (ct, cf);
20861 compare_code = reverse_condition (compare_code);
20862 code = reverse_condition (code);
20864 else
20866 if (fpcmp)
20867 PUT_CODE (compare_op,
20868 reverse_condition_maybe_unordered
20869 (GET_CODE (compare_op)));
20870 else
20871 PUT_CODE (compare_op,
20872 reverse_condition (GET_CODE (compare_op)));
20874 diff = ct - cf;
20876 if (reg_overlap_mentioned_p (out, op0)
20877 || reg_overlap_mentioned_p (out, op1))
20878 tmp = gen_reg_rtx (mode);
20880 if (mode == DImode)
20881 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20882 else
20883 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20884 flags, compare_op));
20886 else
20888 if (code == GT || code == GE)
20889 code = reverse_condition (code);
20890 else
20892 std::swap (ct, cf);
20893 diff = ct - cf;
20895 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20898 if (diff == 1)
20901 * cmpl op0,op1
20902 * sbbl dest,dest
20903 * [addl dest, ct]
20905 * Size 5 - 8.
20907 if (ct)
20908 tmp = expand_simple_binop (mode, PLUS,
20909 tmp, GEN_INT (ct),
20910 copy_rtx (tmp), 1, OPTAB_DIRECT);
20912 else if (cf == -1)
20915 * cmpl op0,op1
20916 * sbbl dest,dest
20917 * orl $ct, dest
20919 * Size 8.
20921 tmp = expand_simple_binop (mode, IOR,
20922 tmp, GEN_INT (ct),
20923 copy_rtx (tmp), 1, OPTAB_DIRECT);
20925 else if (diff == -1 && ct)
20928 * cmpl op0,op1
20929 * sbbl dest,dest
20930 * notl dest
20931 * [addl dest, cf]
20933 * Size 8 - 11.
20935 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20936 if (cf)
20937 tmp = expand_simple_binop (mode, PLUS,
20938 copy_rtx (tmp), GEN_INT (cf),
20939 copy_rtx (tmp), 1, OPTAB_DIRECT);
20941 else
20944 * cmpl op0,op1
20945 * sbbl dest,dest
20946 * [notl dest]
20947 * andl cf - ct, dest
20948 * [addl dest, ct]
20950 * Size 8 - 11.
20953 if (cf == 0)
20955 cf = ct;
20956 ct = 0;
20957 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20960 tmp = expand_simple_binop (mode, AND,
20961 copy_rtx (tmp),
20962 gen_int_mode (cf - ct, mode),
20963 copy_rtx (tmp), 1, OPTAB_DIRECT);
20964 if (ct)
20965 tmp = expand_simple_binop (mode, PLUS,
20966 copy_rtx (tmp), GEN_INT (ct),
20967 copy_rtx (tmp), 1, OPTAB_DIRECT);
20970 if (!rtx_equal_p (tmp, out))
20971 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20973 return true;
20976 if (diff < 0)
20978 machine_mode cmp_mode = GET_MODE (op0);
20979 enum rtx_code new_code;
20981 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20983 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20985 /* We may be reversing unordered compare to normal compare, that
20986 is not valid in general (we may convert non-trapping condition
20987 to trapping one), however on i386 we currently emit all
20988 comparisons unordered. */
20989 new_code = reverse_condition_maybe_unordered (code);
20991 else
20992 new_code = ix86_reverse_condition (code, cmp_mode);
20993 if (new_code != UNKNOWN)
20995 std::swap (ct, cf);
20996 diff = -diff;
20997 code = new_code;
21001 compare_code = UNKNOWN;
21002 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
21003 && CONST_INT_P (op1))
21005 if (op1 == const0_rtx
21006 && (code == LT || code == GE))
21007 compare_code = code;
21008 else if (op1 == constm1_rtx)
21010 if (code == LE)
21011 compare_code = LT;
21012 else if (code == GT)
21013 compare_code = GE;
21017 /* Optimize dest = (op0 < 0) ? -1 : cf. */
21018 if (compare_code != UNKNOWN
21019 && GET_MODE (op0) == GET_MODE (out)
21020 && (cf == -1 || ct == -1))
21022 /* If lea code below could be used, only optimize
21023 if it results in a 2 insn sequence. */
21025 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
21026 || diff == 3 || diff == 5 || diff == 9)
21027 || (compare_code == LT && ct == -1)
21028 || (compare_code == GE && cf == -1))
21031 * notl op1 (if necessary)
21032 * sarl $31, op1
21033 * orl cf, op1
21035 if (ct != -1)
21037 cf = ct;
21038 ct = -1;
21039 code = reverse_condition (code);
21042 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21044 out = expand_simple_binop (mode, IOR,
21045 out, GEN_INT (cf),
21046 out, 1, OPTAB_DIRECT);
21047 if (out != operands[0])
21048 emit_move_insn (operands[0], out);
21050 return true;
21055 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21056 || diff == 3 || diff == 5 || diff == 9)
21057 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21058 && (mode != DImode
21059 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21062 * xorl dest,dest
21063 * cmpl op1,op2
21064 * setcc dest
21065 * lea cf(dest*(ct-cf)),dest
21067 * Size 14.
21069 * This also catches the degenerate setcc-only case.
21072 rtx tmp;
21073 int nops;
21075 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21077 nops = 0;
21078 /* On x86_64 the lea instruction operates on Pmode, so we need
21079 to get arithmetics done in proper mode to match. */
21080 if (diff == 1)
21081 tmp = copy_rtx (out);
21082 else
21084 rtx out1;
21085 out1 = copy_rtx (out);
21086 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21087 nops++;
21088 if (diff & 1)
21090 tmp = gen_rtx_PLUS (mode, tmp, out1);
21091 nops++;
21094 if (cf != 0)
21096 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21097 nops++;
21099 if (!rtx_equal_p (tmp, out))
21101 if (nops == 1)
21102 out = force_operand (tmp, copy_rtx (out));
21103 else
21104 emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp)));
21106 if (!rtx_equal_p (out, operands[0]))
21107 emit_move_insn (operands[0], copy_rtx (out));
21109 return true;
21113 * General case: Jumpful:
21114 * xorl dest,dest cmpl op1, op2
21115 * cmpl op1, op2 movl ct, dest
21116 * setcc dest jcc 1f
21117 * decl dest movl cf, dest
21118 * andl (cf-ct),dest 1:
21119 * addl ct,dest
21121 * Size 20. Size 14.
21123 * This is reasonably steep, but branch mispredict costs are
21124 * high on modern cpus, so consider failing only if optimizing
21125 * for space.
21128 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21129 && BRANCH_COST (optimize_insn_for_speed_p (),
21130 false) >= 2)
21132 if (cf == 0)
21134 machine_mode cmp_mode = GET_MODE (op0);
21135 enum rtx_code new_code;
21137 if (SCALAR_FLOAT_MODE_P (cmp_mode))
21139 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21141 /* We may be reversing unordered compare to normal compare,
21142 that is not valid in general (we may convert non-trapping
21143 condition to trapping one), however on i386 we currently
21144 emit all comparisons unordered. */
21145 new_code = reverse_condition_maybe_unordered (code);
21147 else
21149 new_code = ix86_reverse_condition (code, cmp_mode);
21150 if (compare_code != UNKNOWN && new_code != UNKNOWN)
21151 compare_code = reverse_condition (compare_code);
21154 if (new_code != UNKNOWN)
21156 cf = ct;
21157 ct = 0;
21158 code = new_code;
21162 if (compare_code != UNKNOWN)
21164 /* notl op1 (if needed)
21165 sarl $31, op1
21166 andl (cf-ct), op1
21167 addl ct, op1
21169 For x < 0 (resp. x <= -1) there will be no notl,
21170 so if possible swap the constants to get rid of the
21171 complement.
21172 True/false will be -1/0 while code below (store flag
21173 followed by decrement) is 0/-1, so the constants need
21174 to be exchanged once more. */
21176 if (compare_code == GE || !cf)
21178 code = reverse_condition (code);
21179 compare_code = LT;
21181 else
21182 std::swap (ct, cf);
21184 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21186 else
21188 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21190 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21191 constm1_rtx,
21192 copy_rtx (out), 1, OPTAB_DIRECT);
21195 out = expand_simple_binop (mode, AND, copy_rtx (out),
21196 gen_int_mode (cf - ct, mode),
21197 copy_rtx (out), 1, OPTAB_DIRECT);
21198 if (ct)
21199 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21200 copy_rtx (out), 1, OPTAB_DIRECT);
21201 if (!rtx_equal_p (out, operands[0]))
21202 emit_move_insn (operands[0], copy_rtx (out));
21204 return true;
21208 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21210 /* Try a few things more with specific constants and a variable. */
21212 optab op;
21213 rtx var, orig_out, out, tmp;
21215 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21216 return false;
21218 /* If one of the two operands is an interesting constant, load a
21219 constant with the above and mask it in with a logical operation. */
21221 if (CONST_INT_P (operands[2]))
21223 var = operands[3];
21224 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21225 operands[3] = constm1_rtx, op = and_optab;
21226 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21227 operands[3] = const0_rtx, op = ior_optab;
21228 else
21229 return false;
21231 else if (CONST_INT_P (operands[3]))
21233 var = operands[2];
21234 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21235 operands[2] = constm1_rtx, op = and_optab;
21236 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21237 operands[2] = const0_rtx, op = ior_optab;
21238 else
21239 return false;
21241 else
21242 return false;
21244 orig_out = operands[0];
21245 tmp = gen_reg_rtx (mode);
21246 operands[0] = tmp;
21248 /* Recurse to get the constant loaded. */
21249 if (ix86_expand_int_movcc (operands) == 0)
21250 return false;
21252 /* Mask in the interesting variable. */
21253 out = expand_binop (mode, op, var, tmp, orig_out, 0,
21254 OPTAB_WIDEN);
21255 if (!rtx_equal_p (out, orig_out))
21256 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21258 return true;
21262 * For comparison with above,
21264 * movl cf,dest
21265 * movl ct,tmp
21266 * cmpl op1,op2
21267 * cmovcc tmp,dest
21269 * Size 15.
21272 if (! nonimmediate_operand (operands[2], mode))
21273 operands[2] = force_reg (mode, operands[2]);
21274 if (! nonimmediate_operand (operands[3], mode))
21275 operands[3] = force_reg (mode, operands[3]);
21277 if (! register_operand (operands[2], VOIDmode)
21278 && (mode == QImode
21279 || ! register_operand (operands[3], VOIDmode)))
21280 operands[2] = force_reg (mode, operands[2]);
21282 if (mode == QImode
21283 && ! register_operand (operands[3], VOIDmode))
21284 operands[3] = force_reg (mode, operands[3]);
21286 emit_insn (compare_seq);
21287 emit_insn (gen_rtx_SET (operands[0],
21288 gen_rtx_IF_THEN_ELSE (mode,
21289 compare_op, operands[2],
21290 operands[3])));
21291 return true;
21294 /* Swap, force into registers, or otherwise massage the two operands
21295 to an sse comparison with a mask result. Thus we differ a bit from
21296 ix86_prepare_fp_compare_args which expects to produce a flags result.
21298 The DEST operand exists to help determine whether to commute commutative
21299 operators. The POP0/POP1 operands are updated in place. The new
21300 comparison code is returned, or UNKNOWN if not implementable. */
21302 static enum rtx_code
21303 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21304 rtx *pop0, rtx *pop1)
21306 switch (code)
21308 case LTGT:
21309 case UNEQ:
21310 /* AVX supports all the needed comparisons. */
21311 if (TARGET_AVX)
21312 break;
21313 /* We have no LTGT as an operator. We could implement it with
21314 NE & ORDERED, but this requires an extra temporary. It's
21315 not clear that it's worth it. */
21316 return UNKNOWN;
21318 case LT:
21319 case LE:
21320 case UNGT:
21321 case UNGE:
21322 /* These are supported directly. */
21323 break;
21325 case EQ:
21326 case NE:
21327 case UNORDERED:
21328 case ORDERED:
21329 /* AVX has 3 operand comparisons, no need to swap anything. */
21330 if (TARGET_AVX)
21331 break;
21332 /* For commutative operators, try to canonicalize the destination
21333 operand to be first in the comparison - this helps reload to
21334 avoid extra moves. */
21335 if (!dest || !rtx_equal_p (dest, *pop1))
21336 break;
21337 /* FALLTHRU */
21339 case GE:
21340 case GT:
21341 case UNLE:
21342 case UNLT:
21343 /* These are not supported directly before AVX, and furthermore
21344 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
21345 comparison operands to transform into something that is
21346 supported. */
21347 std::swap (*pop0, *pop1);
21348 code = swap_condition (code);
21349 break;
21351 default:
21352 gcc_unreachable ();
21355 return code;
21358 /* Detect conditional moves that exactly match min/max operational
21359 semantics. Note that this is IEEE safe, as long as we don't
21360 interchange the operands.
21362 Returns FALSE if this conditional move doesn't match a MIN/MAX,
21363 and TRUE if the operation is successful and instructions are emitted. */
21365 static bool
21366 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21367 rtx cmp_op1, rtx if_true, rtx if_false)
21369 machine_mode mode;
21370 bool is_min;
21371 rtx tmp;
21373 if (code == LT)
21375 else if (code == UNGE)
21376 std::swap (if_true, if_false);
21377 else
21378 return false;
21380 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21381 is_min = true;
21382 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21383 is_min = false;
21384 else
21385 return false;
21387 mode = GET_MODE (dest);
21389 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21390 but MODE may be a vector mode and thus not appropriate. */
21391 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21393 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21394 rtvec v;
21396 if_true = force_reg (mode, if_true);
21397 v = gen_rtvec (2, if_true, if_false);
21398 tmp = gen_rtx_UNSPEC (mode, v, u);
21400 else
21402 code = is_min ? SMIN : SMAX;
21403 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21406 emit_insn (gen_rtx_SET (dest, tmp));
21407 return true;
21410 /* Expand an sse vector comparison. Return the register with the result. */
21412 static rtx
21413 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21414 rtx op_true, rtx op_false)
21416 machine_mode mode = GET_MODE (dest);
21417 machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21419 /* In general case result of comparison can differ from operands' type. */
21420 machine_mode cmp_mode;
21422 /* In AVX512F the result of comparison is an integer mask. */
21423 bool maskcmp = false;
21424 rtx x;
21426 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21428 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21429 gcc_assert (cmp_mode != BLKmode);
21431 maskcmp = true;
21433 else
21434 cmp_mode = cmp_ops_mode;
21437 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21438 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21439 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21441 if (optimize
21442 || reg_overlap_mentioned_p (dest, op_true)
21443 || reg_overlap_mentioned_p (dest, op_false))
21444 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21446 /* Compare patterns for int modes are unspec in AVX512F only. */
21447 if (maskcmp && (code == GT || code == EQ))
21449 rtx (*gen)(rtx, rtx, rtx);
21451 switch (cmp_ops_mode)
21453 case V64QImode:
21454 gcc_assert (TARGET_AVX512BW);
21455 gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21456 break;
21457 case V32HImode:
21458 gcc_assert (TARGET_AVX512BW);
21459 gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21460 break;
21461 case V16SImode:
21462 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21463 break;
21464 case V8DImode:
21465 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21466 break;
21467 default:
21468 gen = NULL;
21471 if (gen)
21473 emit_insn (gen (dest, cmp_op0, cmp_op1));
21474 return dest;
21477 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21479 if (cmp_mode != mode && !maskcmp)
21481 x = force_reg (cmp_ops_mode, x);
21482 convert_move (dest, x, false);
21484 else
21485 emit_insn (gen_rtx_SET (dest, x));
21487 return dest;
21490 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21491 operations. This is used for both scalar and vector conditional moves. */
21493 static void
21494 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21496 machine_mode mode = GET_MODE (dest);
21497 machine_mode cmpmode = GET_MODE (cmp);
21499 /* In AVX512F the result of comparison is an integer mask. */
21500 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21502 rtx t2, t3, x;
21504 if (vector_all_ones_operand (op_true, mode)
21505 && rtx_equal_p (op_false, CONST0_RTX (mode))
21506 && !maskcmp)
21508 emit_insn (gen_rtx_SET (dest, cmp));
21510 else if (op_false == CONST0_RTX (mode)
21511 && !maskcmp)
21513 op_true = force_reg (mode, op_true);
21514 x = gen_rtx_AND (mode, cmp, op_true);
21515 emit_insn (gen_rtx_SET (dest, x));
21517 else if (op_true == CONST0_RTX (mode)
21518 && !maskcmp)
21520 op_false = force_reg (mode, op_false);
21521 x = gen_rtx_NOT (mode, cmp);
21522 x = gen_rtx_AND (mode, x, op_false);
21523 emit_insn (gen_rtx_SET (dest, x));
21525 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21526 && !maskcmp)
21528 op_false = force_reg (mode, op_false);
21529 x = gen_rtx_IOR (mode, cmp, op_false);
21530 emit_insn (gen_rtx_SET (dest, x));
21532 else if (TARGET_XOP
21533 && !maskcmp)
21535 op_true = force_reg (mode, op_true);
21537 if (!nonimmediate_operand (op_false, mode))
21538 op_false = force_reg (mode, op_false);
21540 emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp,
21541 op_true,
21542 op_false)));
21544 else
21546 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21547 rtx d = dest;
21549 if (!nonimmediate_operand (op_true, mode))
21550 op_true = force_reg (mode, op_true);
21552 op_false = force_reg (mode, op_false);
21554 switch (mode)
21556 case V4SFmode:
21557 if (TARGET_SSE4_1)
21558 gen = gen_sse4_1_blendvps;
21559 break;
21560 case V2DFmode:
21561 if (TARGET_SSE4_1)
21562 gen = gen_sse4_1_blendvpd;
21563 break;
21564 case V16QImode:
21565 case V8HImode:
21566 case V4SImode:
21567 case V2DImode:
21568 if (TARGET_SSE4_1)
21570 gen = gen_sse4_1_pblendvb;
21571 if (mode != V16QImode)
21572 d = gen_reg_rtx (V16QImode);
21573 op_false = gen_lowpart (V16QImode, op_false);
21574 op_true = gen_lowpart (V16QImode, op_true);
21575 cmp = gen_lowpart (V16QImode, cmp);
21577 break;
21578 case V8SFmode:
21579 if (TARGET_AVX)
21580 gen = gen_avx_blendvps256;
21581 break;
21582 case V4DFmode:
21583 if (TARGET_AVX)
21584 gen = gen_avx_blendvpd256;
21585 break;
21586 case V32QImode:
21587 case V16HImode:
21588 case V8SImode:
21589 case V4DImode:
21590 if (TARGET_AVX2)
21592 gen = gen_avx2_pblendvb;
21593 if (mode != V32QImode)
21594 d = gen_reg_rtx (V32QImode);
21595 op_false = gen_lowpart (V32QImode, op_false);
21596 op_true = gen_lowpart (V32QImode, op_true);
21597 cmp = gen_lowpart (V32QImode, cmp);
21599 break;
21601 case V64QImode:
21602 gen = gen_avx512bw_blendmv64qi;
21603 break;
21604 case V32HImode:
21605 gen = gen_avx512bw_blendmv32hi;
21606 break;
21607 case V16SImode:
21608 gen = gen_avx512f_blendmv16si;
21609 break;
21610 case V8DImode:
21611 gen = gen_avx512f_blendmv8di;
21612 break;
21613 case V8DFmode:
21614 gen = gen_avx512f_blendmv8df;
21615 break;
21616 case V16SFmode:
21617 gen = gen_avx512f_blendmv16sf;
21618 break;
21620 default:
21621 break;
21624 if (gen != NULL)
21626 emit_insn (gen (d, op_false, op_true, cmp));
21627 if (d != dest)
21628 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21630 else
21632 op_true = force_reg (mode, op_true);
21634 t2 = gen_reg_rtx (mode);
21635 if (optimize)
21636 t3 = gen_reg_rtx (mode);
21637 else
21638 t3 = dest;
21640 x = gen_rtx_AND (mode, op_true, cmp);
21641 emit_insn (gen_rtx_SET (t2, x));
21643 x = gen_rtx_NOT (mode, cmp);
21644 x = gen_rtx_AND (mode, x, op_false);
21645 emit_insn (gen_rtx_SET (t3, x));
21647 x = gen_rtx_IOR (mode, t3, t2);
21648 emit_insn (gen_rtx_SET (dest, x));
21653 /* Expand a floating-point conditional move. Return true if successful. */
21655 bool
21656 ix86_expand_fp_movcc (rtx operands[])
21658 machine_mode mode = GET_MODE (operands[0]);
21659 enum rtx_code code = GET_CODE (operands[1]);
21660 rtx tmp, compare_op;
21661 rtx op0 = XEXP (operands[1], 0);
21662 rtx op1 = XEXP (operands[1], 1);
21664 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21666 machine_mode cmode;
21668 /* Since we've no cmove for sse registers, don't force bad register
21669 allocation just to gain access to it. Deny movcc when the
21670 comparison mode doesn't match the move mode. */
21671 cmode = GET_MODE (op0);
21672 if (cmode == VOIDmode)
21673 cmode = GET_MODE (op1);
21674 if (cmode != mode)
21675 return false;
21677 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21678 if (code == UNKNOWN)
21679 return false;
21681 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21682 operands[2], operands[3]))
21683 return true;
21685 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21686 operands[2], operands[3]);
21687 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21688 return true;
21691 if (GET_MODE (op0) == TImode
21692 || (GET_MODE (op0) == DImode
21693 && !TARGET_64BIT))
21694 return false;
21696 /* The floating point conditional move instructions don't directly
21697 support conditions resulting from a signed integer comparison. */
21699 compare_op = ix86_expand_compare (code, op0, op1);
21700 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21702 tmp = gen_reg_rtx (QImode);
21703 ix86_expand_setcc (tmp, code, op0, op1);
21705 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21708 emit_insn (gen_rtx_SET (operands[0],
21709 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21710 operands[2], operands[3])));
21712 return true;
21715 /* Expand a floating-point vector conditional move; a vcond operation
21716 rather than a movcc operation. */
21718 bool
21719 ix86_expand_fp_vcond (rtx operands[])
21721 enum rtx_code code = GET_CODE (operands[3]);
21722 rtx cmp;
21724 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21725 &operands[4], &operands[5]);
21726 if (code == UNKNOWN)
21728 rtx temp;
21729 switch (GET_CODE (operands[3]))
21731 case LTGT:
21732 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21733 operands[5], operands[0], operands[0]);
21734 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21735 operands[5], operands[1], operands[2]);
21736 code = AND;
21737 break;
21738 case UNEQ:
21739 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21740 operands[5], operands[0], operands[0]);
21741 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21742 operands[5], operands[1], operands[2]);
21743 code = IOR;
21744 break;
21745 default:
21746 gcc_unreachable ();
21748 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21749 OPTAB_DIRECT);
21750 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21751 return true;
21754 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21755 operands[5], operands[1], operands[2]))
21756 return true;
21758 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21759 operands[1], operands[2]);
21760 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21761 return true;
21764 /* Expand a signed/unsigned integral vector conditional move. */
21766 bool
21767 ix86_expand_int_vcond (rtx operands[])
21769 machine_mode data_mode = GET_MODE (operands[0]);
21770 machine_mode mode = GET_MODE (operands[4]);
21771 enum rtx_code code = GET_CODE (operands[3]);
21772 bool negate = false;
21773 rtx x, cop0, cop1;
21775 cop0 = operands[4];
21776 cop1 = operands[5];
21778 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21779 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21780 if ((code == LT || code == GE)
21781 && data_mode == mode
21782 && cop1 == CONST0_RTX (mode)
21783 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21784 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21785 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21786 && (GET_MODE_SIZE (data_mode) == 16
21787 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21789 rtx negop = operands[2 - (code == LT)];
21790 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21791 if (negop == CONST1_RTX (data_mode))
21793 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21794 operands[0], 1, OPTAB_DIRECT);
21795 if (res != operands[0])
21796 emit_move_insn (operands[0], res);
21797 return true;
21799 else if (GET_MODE_INNER (data_mode) != DImode
21800 && vector_all_ones_operand (negop, data_mode))
21802 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21803 operands[0], 0, OPTAB_DIRECT);
21804 if (res != operands[0])
21805 emit_move_insn (operands[0], res);
21806 return true;
21810 if (!nonimmediate_operand (cop1, mode))
21811 cop1 = force_reg (mode, cop1);
21812 if (!general_operand (operands[1], data_mode))
21813 operands[1] = force_reg (data_mode, operands[1]);
21814 if (!general_operand (operands[2], data_mode))
21815 operands[2] = force_reg (data_mode, operands[2]);
21817 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21818 if (TARGET_XOP
21819 && (mode == V16QImode || mode == V8HImode
21820 || mode == V4SImode || mode == V2DImode))
21822 else
21824 /* Canonicalize the comparison to EQ, GT, GTU. */
21825 switch (code)
21827 case EQ:
21828 case GT:
21829 case GTU:
21830 break;
21832 case NE:
21833 case LE:
21834 case LEU:
21835 code = reverse_condition (code);
21836 negate = true;
21837 break;
21839 case GE:
21840 case GEU:
21841 code = reverse_condition (code);
21842 negate = true;
21843 /* FALLTHRU */
21845 case LT:
21846 case LTU:
21847 std::swap (cop0, cop1);
21848 code = swap_condition (code);
21849 break;
21851 default:
21852 gcc_unreachable ();
21855 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21856 if (mode == V2DImode)
21858 switch (code)
21860 case EQ:
21861 /* SSE4.1 supports EQ. */
21862 if (!TARGET_SSE4_1)
21863 return false;
21864 break;
21866 case GT:
21867 case GTU:
21868 /* SSE4.2 supports GT/GTU. */
21869 if (!TARGET_SSE4_2)
21870 return false;
21871 break;
21873 default:
21874 gcc_unreachable ();
21878 /* Unsigned parallel compare is not supported by the hardware.
21879 Play some tricks to turn this into a signed comparison
21880 against 0. */
21881 if (code == GTU)
21883 cop0 = force_reg (mode, cop0);
21885 switch (mode)
21887 case V16SImode:
21888 case V8DImode:
21889 case V8SImode:
21890 case V4DImode:
21891 case V4SImode:
21892 case V2DImode:
21894 rtx t1, t2, mask;
21895 rtx (*gen_sub3) (rtx, rtx, rtx);
21897 switch (mode)
21899 case V16SImode: gen_sub3 = gen_subv16si3; break;
21900 case V8DImode: gen_sub3 = gen_subv8di3; break;
21901 case V8SImode: gen_sub3 = gen_subv8si3; break;
21902 case V4DImode: gen_sub3 = gen_subv4di3; break;
21903 case V4SImode: gen_sub3 = gen_subv4si3; break;
21904 case V2DImode: gen_sub3 = gen_subv2di3; break;
21905 default:
21906 gcc_unreachable ();
21908 /* Subtract (-(INT MAX) - 1) from both operands to make
21909 them signed. */
21910 mask = ix86_build_signbit_mask (mode, true, false);
21911 t1 = gen_reg_rtx (mode);
21912 emit_insn (gen_sub3 (t1, cop0, mask));
21914 t2 = gen_reg_rtx (mode);
21915 emit_insn (gen_sub3 (t2, cop1, mask));
21917 cop0 = t1;
21918 cop1 = t2;
21919 code = GT;
21921 break;
21923 case V64QImode:
21924 case V32HImode:
21925 case V32QImode:
21926 case V16HImode:
21927 case V16QImode:
21928 case V8HImode:
21929 /* Perform a parallel unsigned saturating subtraction. */
21930 x = gen_reg_rtx (mode);
21931 emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, cop1)));
21933 cop0 = x;
21934 cop1 = CONST0_RTX (mode);
21935 code = EQ;
21936 negate = !negate;
21937 break;
21939 default:
21940 gcc_unreachable ();
21945 /* Allow the comparison to be done in one mode, but the movcc to
21946 happen in another mode. */
21947 if (data_mode == mode)
21949 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21950 operands[1+negate], operands[2-negate]);
21952 else
21954 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21955 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21956 operands[1+negate], operands[2-negate]);
21957 if (GET_MODE (x) == mode)
21958 x = gen_lowpart (data_mode, x);
21961 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21962 operands[2-negate]);
21963 return true;
21966 /* AVX512F does support 64-byte integer vector operations,
21967 thus the longest vector we are faced with is V64QImode. */
21968 #define MAX_VECT_LEN 64
21970 struct expand_vec_perm_d
21972 rtx target, op0, op1;
21973 unsigned char perm[MAX_VECT_LEN];
21974 machine_mode vmode;
21975 unsigned char nelt;
21976 bool one_operand_p;
21977 bool testing_p;
21980 static bool
21981 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21982 struct expand_vec_perm_d *d)
21984 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21985 expander, so args are either in d, or in op0, op1 etc. */
21986 machine_mode mode = GET_MODE (d ? d->op0 : op0);
21987 machine_mode maskmode = mode;
21988 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21990 switch (mode)
21992 case V8HImode:
21993 if (TARGET_AVX512VL && TARGET_AVX512BW)
21994 gen = gen_avx512vl_vpermi2varv8hi3;
21995 break;
21996 case V16HImode:
21997 if (TARGET_AVX512VL && TARGET_AVX512BW)
21998 gen = gen_avx512vl_vpermi2varv16hi3;
21999 break;
22000 case V64QImode:
22001 if (TARGET_AVX512VBMI)
22002 gen = gen_avx512bw_vpermi2varv64qi3;
22003 break;
22004 case V32HImode:
22005 if (TARGET_AVX512BW)
22006 gen = gen_avx512bw_vpermi2varv32hi3;
22007 break;
22008 case V4SImode:
22009 if (TARGET_AVX512VL)
22010 gen = gen_avx512vl_vpermi2varv4si3;
22011 break;
22012 case V8SImode:
22013 if (TARGET_AVX512VL)
22014 gen = gen_avx512vl_vpermi2varv8si3;
22015 break;
22016 case V16SImode:
22017 if (TARGET_AVX512F)
22018 gen = gen_avx512f_vpermi2varv16si3;
22019 break;
22020 case V4SFmode:
22021 if (TARGET_AVX512VL)
22023 gen = gen_avx512vl_vpermi2varv4sf3;
22024 maskmode = V4SImode;
22026 break;
22027 case V8SFmode:
22028 if (TARGET_AVX512VL)
22030 gen = gen_avx512vl_vpermi2varv8sf3;
22031 maskmode = V8SImode;
22033 break;
22034 case V16SFmode:
22035 if (TARGET_AVX512F)
22037 gen = gen_avx512f_vpermi2varv16sf3;
22038 maskmode = V16SImode;
22040 break;
22041 case V2DImode:
22042 if (TARGET_AVX512VL)
22043 gen = gen_avx512vl_vpermi2varv2di3;
22044 break;
22045 case V4DImode:
22046 if (TARGET_AVX512VL)
22047 gen = gen_avx512vl_vpermi2varv4di3;
22048 break;
22049 case V8DImode:
22050 if (TARGET_AVX512F)
22051 gen = gen_avx512f_vpermi2varv8di3;
22052 break;
22053 case V2DFmode:
22054 if (TARGET_AVX512VL)
22056 gen = gen_avx512vl_vpermi2varv2df3;
22057 maskmode = V2DImode;
22059 break;
22060 case V4DFmode:
22061 if (TARGET_AVX512VL)
22063 gen = gen_avx512vl_vpermi2varv4df3;
22064 maskmode = V4DImode;
22066 break;
22067 case V8DFmode:
22068 if (TARGET_AVX512F)
22070 gen = gen_avx512f_vpermi2varv8df3;
22071 maskmode = V8DImode;
22073 break;
22074 default:
22075 break;
22078 if (gen == NULL)
22079 return false;
22081 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22082 expander, so args are either in d, or in op0, op1 etc. */
22083 if (d)
22085 rtx vec[64];
22086 target = d->target;
22087 op0 = d->op0;
22088 op1 = d->op1;
22089 for (int i = 0; i < d->nelt; ++i)
22090 vec[i] = GEN_INT (d->perm[i]);
22091 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22094 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22095 return true;
22098 /* Expand a variable vector permutation. */
22100 void
22101 ix86_expand_vec_perm (rtx operands[])
22103 rtx target = operands[0];
22104 rtx op0 = operands[1];
22105 rtx op1 = operands[2];
22106 rtx mask = operands[3];
22107 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22108 machine_mode mode = GET_MODE (op0);
22109 machine_mode maskmode = GET_MODE (mask);
22110 int w, e, i;
22111 bool one_operand_shuffle = rtx_equal_p (op0, op1);
22113 /* Number of elements in the vector. */
22114 w = GET_MODE_NUNITS (mode);
22115 e = GET_MODE_UNIT_SIZE (mode);
22116 gcc_assert (w <= 64);
22118 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22119 return;
22121 if (TARGET_AVX2)
22123 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22125 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22126 an constant shuffle operand. With a tiny bit of effort we can
22127 use VPERMD instead. A re-interpretation stall for V4DFmode is
22128 unfortunate but there's no avoiding it.
22129 Similarly for V16HImode we don't have instructions for variable
22130 shuffling, while for V32QImode we can use after preparing suitable
22131 masks vpshufb; vpshufb; vpermq; vpor. */
22133 if (mode == V16HImode)
22135 maskmode = mode = V32QImode;
22136 w = 32;
22137 e = 1;
22139 else
22141 maskmode = mode = V8SImode;
22142 w = 8;
22143 e = 4;
22145 t1 = gen_reg_rtx (maskmode);
22147 /* Replicate the low bits of the V4DImode mask into V8SImode:
22148 mask = { A B C D }
22149 t1 = { A A B B C C D D }. */
22150 for (i = 0; i < w / 2; ++i)
22151 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22152 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22153 vt = force_reg (maskmode, vt);
22154 mask = gen_lowpart (maskmode, mask);
22155 if (maskmode == V8SImode)
22156 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22157 else
22158 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22160 /* Multiply the shuffle indicies by two. */
22161 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22162 OPTAB_DIRECT);
22164 /* Add one to the odd shuffle indicies:
22165 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
22166 for (i = 0; i < w / 2; ++i)
22168 vec[i * 2] = const0_rtx;
22169 vec[i * 2 + 1] = const1_rtx;
22171 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22172 vt = validize_mem (force_const_mem (maskmode, vt));
22173 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22174 OPTAB_DIRECT);
22176 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
22177 operands[3] = mask = t1;
22178 target = gen_reg_rtx (mode);
22179 op0 = gen_lowpart (mode, op0);
22180 op1 = gen_lowpart (mode, op1);
22183 switch (mode)
22185 case V8SImode:
22186 /* The VPERMD and VPERMPS instructions already properly ignore
22187 the high bits of the shuffle elements. No need for us to
22188 perform an AND ourselves. */
22189 if (one_operand_shuffle)
22191 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22192 if (target != operands[0])
22193 emit_move_insn (operands[0],
22194 gen_lowpart (GET_MODE (operands[0]), target));
22196 else
22198 t1 = gen_reg_rtx (V8SImode);
22199 t2 = gen_reg_rtx (V8SImode);
22200 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22201 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22202 goto merge_two;
22204 return;
22206 case V8SFmode:
22207 mask = gen_lowpart (V8SImode, mask);
22208 if (one_operand_shuffle)
22209 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22210 else
22212 t1 = gen_reg_rtx (V8SFmode);
22213 t2 = gen_reg_rtx (V8SFmode);
22214 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22215 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22216 goto merge_two;
22218 return;
22220 case V4SImode:
22221 /* By combining the two 128-bit input vectors into one 256-bit
22222 input vector, we can use VPERMD and VPERMPS for the full
22223 two-operand shuffle. */
22224 t1 = gen_reg_rtx (V8SImode);
22225 t2 = gen_reg_rtx (V8SImode);
22226 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22227 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22228 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22229 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22230 return;
22232 case V4SFmode:
22233 t1 = gen_reg_rtx (V8SFmode);
22234 t2 = gen_reg_rtx (V8SImode);
22235 mask = gen_lowpart (V4SImode, mask);
22236 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22237 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22238 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22239 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22240 return;
22242 case V32QImode:
22243 t1 = gen_reg_rtx (V32QImode);
22244 t2 = gen_reg_rtx (V32QImode);
22245 t3 = gen_reg_rtx (V32QImode);
22246 vt2 = GEN_INT (-128);
22247 for (i = 0; i < 32; i++)
22248 vec[i] = vt2;
22249 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22250 vt = force_reg (V32QImode, vt);
22251 for (i = 0; i < 32; i++)
22252 vec[i] = i < 16 ? vt2 : const0_rtx;
22253 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22254 vt2 = force_reg (V32QImode, vt2);
22255 /* From mask create two adjusted masks, which contain the same
22256 bits as mask in the low 7 bits of each vector element.
22257 The first mask will have the most significant bit clear
22258 if it requests element from the same 128-bit lane
22259 and MSB set if it requests element from the other 128-bit lane.
22260 The second mask will have the opposite values of the MSB,
22261 and additionally will have its 128-bit lanes swapped.
22262 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22263 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
22264 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22265 stands for other 12 bytes. */
22266 /* The bit whether element is from the same lane or the other
22267 lane is bit 4, so shift it up by 3 to the MSB position. */
22268 t5 = gen_reg_rtx (V4DImode);
22269 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22270 GEN_INT (3)));
22271 /* Clear MSB bits from the mask just in case it had them set. */
22272 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22273 /* After this t1 will have MSB set for elements from other lane. */
22274 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22275 /* Clear bits other than MSB. */
22276 emit_insn (gen_andv32qi3 (t1, t1, vt));
22277 /* Or in the lower bits from mask into t3. */
22278 emit_insn (gen_iorv32qi3 (t3, t1, t2));
22279 /* And invert MSB bits in t1, so MSB is set for elements from the same
22280 lane. */
22281 emit_insn (gen_xorv32qi3 (t1, t1, vt));
22282 /* Swap 128-bit lanes in t3. */
22283 t6 = gen_reg_rtx (V4DImode);
22284 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22285 const2_rtx, GEN_INT (3),
22286 const0_rtx, const1_rtx));
22287 /* And or in the lower bits from mask into t1. */
22288 emit_insn (gen_iorv32qi3 (t1, t1, t2));
22289 if (one_operand_shuffle)
22291 /* Each of these shuffles will put 0s in places where
22292 element from the other 128-bit lane is needed, otherwise
22293 will shuffle in the requested value. */
22294 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22295 gen_lowpart (V32QImode, t6)));
22296 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22297 /* For t3 the 128-bit lanes are swapped again. */
22298 t7 = gen_reg_rtx (V4DImode);
22299 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22300 const2_rtx, GEN_INT (3),
22301 const0_rtx, const1_rtx));
22302 /* And oring both together leads to the result. */
22303 emit_insn (gen_iorv32qi3 (target, t1,
22304 gen_lowpart (V32QImode, t7)));
22305 if (target != operands[0])
22306 emit_move_insn (operands[0],
22307 gen_lowpart (GET_MODE (operands[0]), target));
22308 return;
22311 t4 = gen_reg_rtx (V32QImode);
22312 /* Similarly to the above one_operand_shuffle code,
22313 just for repeated twice for each operand. merge_two:
22314 code will merge the two results together. */
22315 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22316 gen_lowpart (V32QImode, t6)));
22317 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22318 gen_lowpart (V32QImode, t6)));
22319 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22320 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22321 t7 = gen_reg_rtx (V4DImode);
22322 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22323 const2_rtx, GEN_INT (3),
22324 const0_rtx, const1_rtx));
22325 t8 = gen_reg_rtx (V4DImode);
22326 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22327 const2_rtx, GEN_INT (3),
22328 const0_rtx, const1_rtx));
22329 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22330 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22331 t1 = t4;
22332 t2 = t3;
22333 goto merge_two;
22335 default:
22336 gcc_assert (GET_MODE_SIZE (mode) <= 16);
22337 break;
22341 if (TARGET_XOP)
22343 /* The XOP VPPERM insn supports three inputs. By ignoring the
22344 one_operand_shuffle special case, we avoid creating another
22345 set of constant vectors in memory. */
22346 one_operand_shuffle = false;
22348 /* mask = mask & {2*w-1, ...} */
22349 vt = GEN_INT (2*w - 1);
22351 else
22353 /* mask = mask & {w-1, ...} */
22354 vt = GEN_INT (w - 1);
22357 for (i = 0; i < w; i++)
22358 vec[i] = vt;
22359 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22360 mask = expand_simple_binop (maskmode, AND, mask, vt,
22361 NULL_RTX, 0, OPTAB_DIRECT);
22363 /* For non-QImode operations, convert the word permutation control
22364 into a byte permutation control. */
22365 if (mode != V16QImode)
22367 mask = expand_simple_binop (maskmode, ASHIFT, mask,
22368 GEN_INT (exact_log2 (e)),
22369 NULL_RTX, 0, OPTAB_DIRECT);
22371 /* Convert mask to vector of chars. */
22372 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22374 /* Replicate each of the input bytes into byte positions:
22375 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22376 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22377 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
22378 for (i = 0; i < 16; ++i)
22379 vec[i] = GEN_INT (i/e * e);
22380 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22381 vt = validize_mem (force_const_mem (V16QImode, vt));
22382 if (TARGET_XOP)
22383 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22384 else
22385 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22387 /* Convert it into the byte positions by doing
22388 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
22389 for (i = 0; i < 16; ++i)
22390 vec[i] = GEN_INT (i % e);
22391 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22392 vt = validize_mem (force_const_mem (V16QImode, vt));
22393 emit_insn (gen_addv16qi3 (mask, mask, vt));
22396 /* The actual shuffle operations all operate on V16QImode. */
22397 op0 = gen_lowpart (V16QImode, op0);
22398 op1 = gen_lowpart (V16QImode, op1);
22400 if (TARGET_XOP)
22402 if (GET_MODE (target) != V16QImode)
22403 target = gen_reg_rtx (V16QImode);
22404 emit_insn (gen_xop_pperm (target, op0, op1, mask));
22405 if (target != operands[0])
22406 emit_move_insn (operands[0],
22407 gen_lowpart (GET_MODE (operands[0]), target));
22409 else if (one_operand_shuffle)
22411 if (GET_MODE (target) != V16QImode)
22412 target = gen_reg_rtx (V16QImode);
22413 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22414 if (target != operands[0])
22415 emit_move_insn (operands[0],
22416 gen_lowpart (GET_MODE (operands[0]), target));
22418 else
22420 rtx xops[6];
22421 bool ok;
22423 /* Shuffle the two input vectors independently. */
22424 t1 = gen_reg_rtx (V16QImode);
22425 t2 = gen_reg_rtx (V16QImode);
22426 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22427 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22429 merge_two:
22430 /* Then merge them together. The key is whether any given control
22431 element contained a bit set that indicates the second word. */
22432 mask = operands[3];
22433 vt = GEN_INT (w);
22434 if (maskmode == V2DImode && !TARGET_SSE4_1)
22436 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
22437 more shuffle to convert the V2DI input mask into a V4SI
22438 input mask. At which point the masking that expand_int_vcond
22439 will work as desired. */
22440 rtx t3 = gen_reg_rtx (V4SImode);
22441 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22442 const0_rtx, const0_rtx,
22443 const2_rtx, const2_rtx));
22444 mask = t3;
22445 maskmode = V4SImode;
22446 e = w = 4;
22449 for (i = 0; i < w; i++)
22450 vec[i] = vt;
22451 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22452 vt = force_reg (maskmode, vt);
22453 mask = expand_simple_binop (maskmode, AND, mask, vt,
22454 NULL_RTX, 0, OPTAB_DIRECT);
22456 if (GET_MODE (target) != mode)
22457 target = gen_reg_rtx (mode);
22458 xops[0] = target;
22459 xops[1] = gen_lowpart (mode, t2);
22460 xops[2] = gen_lowpart (mode, t1);
22461 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22462 xops[4] = mask;
22463 xops[5] = vt;
22464 ok = ix86_expand_int_vcond (xops);
22465 gcc_assert (ok);
22466 if (target != operands[0])
22467 emit_move_insn (operands[0],
22468 gen_lowpart (GET_MODE (operands[0]), target));
22472 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
22473 true if we should do zero extension, else sign extension. HIGH_P is
22474 true if we want the N/2 high elements, else the low elements. */
22476 void
22477 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22479 machine_mode imode = GET_MODE (src);
22480 rtx tmp;
22482 if (TARGET_SSE4_1)
22484 rtx (*unpack)(rtx, rtx);
22485 rtx (*extract)(rtx, rtx) = NULL;
22486 machine_mode halfmode = BLKmode;
22488 switch (imode)
22490 case V64QImode:
22491 if (unsigned_p)
22492 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22493 else
22494 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22495 halfmode = V32QImode;
22496 extract
22497 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22498 break;
22499 case V32QImode:
22500 if (unsigned_p)
22501 unpack = gen_avx2_zero_extendv16qiv16hi2;
22502 else
22503 unpack = gen_avx2_sign_extendv16qiv16hi2;
22504 halfmode = V16QImode;
22505 extract
22506 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22507 break;
22508 case V32HImode:
22509 if (unsigned_p)
22510 unpack = gen_avx512f_zero_extendv16hiv16si2;
22511 else
22512 unpack = gen_avx512f_sign_extendv16hiv16si2;
22513 halfmode = V16HImode;
22514 extract
22515 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22516 break;
22517 case V16HImode:
22518 if (unsigned_p)
22519 unpack = gen_avx2_zero_extendv8hiv8si2;
22520 else
22521 unpack = gen_avx2_sign_extendv8hiv8si2;
22522 halfmode = V8HImode;
22523 extract
22524 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22525 break;
22526 case V16SImode:
22527 if (unsigned_p)
22528 unpack = gen_avx512f_zero_extendv8siv8di2;
22529 else
22530 unpack = gen_avx512f_sign_extendv8siv8di2;
22531 halfmode = V8SImode;
22532 extract
22533 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22534 break;
22535 case V8SImode:
22536 if (unsigned_p)
22537 unpack = gen_avx2_zero_extendv4siv4di2;
22538 else
22539 unpack = gen_avx2_sign_extendv4siv4di2;
22540 halfmode = V4SImode;
22541 extract
22542 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22543 break;
22544 case V16QImode:
22545 if (unsigned_p)
22546 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22547 else
22548 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22549 break;
22550 case V8HImode:
22551 if (unsigned_p)
22552 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22553 else
22554 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22555 break;
22556 case V4SImode:
22557 if (unsigned_p)
22558 unpack = gen_sse4_1_zero_extendv2siv2di2;
22559 else
22560 unpack = gen_sse4_1_sign_extendv2siv2di2;
22561 break;
22562 default:
22563 gcc_unreachable ();
22566 if (GET_MODE_SIZE (imode) >= 32)
22568 tmp = gen_reg_rtx (halfmode);
22569 emit_insn (extract (tmp, src));
22571 else if (high_p)
22573 /* Shift higher 8 bytes to lower 8 bytes. */
22574 tmp = gen_reg_rtx (V1TImode);
22575 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22576 GEN_INT (64)));
22577 tmp = gen_lowpart (imode, tmp);
22579 else
22580 tmp = src;
22582 emit_insn (unpack (dest, tmp));
22584 else
22586 rtx (*unpack)(rtx, rtx, rtx);
22588 switch (imode)
22590 case V16QImode:
22591 if (high_p)
22592 unpack = gen_vec_interleave_highv16qi;
22593 else
22594 unpack = gen_vec_interleave_lowv16qi;
22595 break;
22596 case V8HImode:
22597 if (high_p)
22598 unpack = gen_vec_interleave_highv8hi;
22599 else
22600 unpack = gen_vec_interleave_lowv8hi;
22601 break;
22602 case V4SImode:
22603 if (high_p)
22604 unpack = gen_vec_interleave_highv4si;
22605 else
22606 unpack = gen_vec_interleave_lowv4si;
22607 break;
22608 default:
22609 gcc_unreachable ();
22612 if (unsigned_p)
22613 tmp = force_reg (imode, CONST0_RTX (imode));
22614 else
22615 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22616 src, pc_rtx, pc_rtx);
22618 rtx tmp2 = gen_reg_rtx (imode);
22619 emit_insn (unpack (tmp2, src, tmp));
22620 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22624 /* Expand conditional increment or decrement using adb/sbb instructions.
22625 The default case using setcc followed by the conditional move can be
22626 done by generic code. */
22627 bool
22628 ix86_expand_int_addcc (rtx operands[])
22630 enum rtx_code code = GET_CODE (operands[1]);
22631 rtx flags;
22632 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22633 rtx compare_op;
22634 rtx val = const0_rtx;
22635 bool fpcmp = false;
22636 machine_mode mode;
22637 rtx op0 = XEXP (operands[1], 0);
22638 rtx op1 = XEXP (operands[1], 1);
22640 if (operands[3] != const1_rtx
22641 && operands[3] != constm1_rtx)
22642 return false;
22643 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22644 return false;
22645 code = GET_CODE (compare_op);
22647 flags = XEXP (compare_op, 0);
22649 if (GET_MODE (flags) == CCFPmode
22650 || GET_MODE (flags) == CCFPUmode)
22652 fpcmp = true;
22653 code = ix86_fp_compare_code_to_integer (code);
22656 if (code != LTU)
22658 val = constm1_rtx;
22659 if (fpcmp)
22660 PUT_CODE (compare_op,
22661 reverse_condition_maybe_unordered
22662 (GET_CODE (compare_op)));
22663 else
22664 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22667 mode = GET_MODE (operands[0]);
22669 /* Construct either adc or sbb insn. */
22670 if ((code == LTU) == (operands[3] == constm1_rtx))
22672 switch (mode)
22674 case QImode:
22675 insn = gen_subqi3_carry;
22676 break;
22677 case HImode:
22678 insn = gen_subhi3_carry;
22679 break;
22680 case SImode:
22681 insn = gen_subsi3_carry;
22682 break;
22683 case DImode:
22684 insn = gen_subdi3_carry;
22685 break;
22686 default:
22687 gcc_unreachable ();
22690 else
22692 switch (mode)
22694 case QImode:
22695 insn = gen_addqi3_carry;
22696 break;
22697 case HImode:
22698 insn = gen_addhi3_carry;
22699 break;
22700 case SImode:
22701 insn = gen_addsi3_carry;
22702 break;
22703 case DImode:
22704 insn = gen_adddi3_carry;
22705 break;
22706 default:
22707 gcc_unreachable ();
22710 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22712 return true;
22716 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22717 but works for floating pointer parameters and nonoffsetable memories.
22718 For pushes, it returns just stack offsets; the values will be saved
22719 in the right order. Maximally three parts are generated. */
22721 static int
22722 ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22724 int size;
22726 if (!TARGET_64BIT)
22727 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22728 else
22729 size = (GET_MODE_SIZE (mode) + 4) / 8;
22731 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22732 gcc_assert (size >= 2 && size <= 4);
22734 /* Optimize constant pool reference to immediates. This is used by fp
22735 moves, that force all constants to memory to allow combining. */
22736 if (MEM_P (operand) && MEM_READONLY_P (operand))
22738 rtx tmp = maybe_get_pool_constant (operand);
22739 if (tmp)
22740 operand = tmp;
22743 if (MEM_P (operand) && !offsettable_memref_p (operand))
22745 /* The only non-offsetable memories we handle are pushes. */
22746 int ok = push_operand (operand, VOIDmode);
22748 gcc_assert (ok);
22750 operand = copy_rtx (operand);
22751 PUT_MODE (operand, word_mode);
22752 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22753 return size;
22756 if (GET_CODE (operand) == CONST_VECTOR)
22758 machine_mode imode = int_mode_for_mode (mode);
22759 /* Caution: if we looked through a constant pool memory above,
22760 the operand may actually have a different mode now. That's
22761 ok, since we want to pun this all the way back to an integer. */
22762 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22763 gcc_assert (operand != NULL);
22764 mode = imode;
22767 if (!TARGET_64BIT)
22769 if (mode == DImode)
22770 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22771 else
22773 int i;
22775 if (REG_P (operand))
22777 gcc_assert (reload_completed);
22778 for (i = 0; i < size; i++)
22779 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22781 else if (offsettable_memref_p (operand))
22783 operand = adjust_address (operand, SImode, 0);
22784 parts[0] = operand;
22785 for (i = 1; i < size; i++)
22786 parts[i] = adjust_address (operand, SImode, 4 * i);
22788 else if (CONST_DOUBLE_P (operand))
22790 REAL_VALUE_TYPE r;
22791 long l[4];
22793 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22794 switch (mode)
22796 case TFmode:
22797 real_to_target (l, &r, mode);
22798 parts[3] = gen_int_mode (l[3], SImode);
22799 parts[2] = gen_int_mode (l[2], SImode);
22800 break;
22801 case XFmode:
22802 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22803 long double may not be 80-bit. */
22804 real_to_target (l, &r, mode);
22805 parts[2] = gen_int_mode (l[2], SImode);
22806 break;
22807 case DFmode:
22808 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22809 break;
22810 default:
22811 gcc_unreachable ();
22813 parts[1] = gen_int_mode (l[1], SImode);
22814 parts[0] = gen_int_mode (l[0], SImode);
22816 else
22817 gcc_unreachable ();
22820 else
22822 if (mode == TImode)
22823 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22824 if (mode == XFmode || mode == TFmode)
22826 machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22827 if (REG_P (operand))
22829 gcc_assert (reload_completed);
22830 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22831 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22833 else if (offsettable_memref_p (operand))
22835 operand = adjust_address (operand, DImode, 0);
22836 parts[0] = operand;
22837 parts[1] = adjust_address (operand, upper_mode, 8);
22839 else if (CONST_DOUBLE_P (operand))
22841 REAL_VALUE_TYPE r;
22842 long l[4];
22844 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22845 real_to_target (l, &r, mode);
22847 /* real_to_target puts 32-bit pieces in each long. */
22848 parts[0] =
22849 gen_int_mode
22850 ((l[0] & (HOST_WIDE_INT) 0xffffffff)
22851 | ((l[1] & (HOST_WIDE_INT) 0xffffffff) << 32),
22852 DImode);
22854 if (upper_mode == SImode)
22855 parts[1] = gen_int_mode (l[2], SImode);
22856 else
22857 parts[1] =
22858 gen_int_mode
22859 ((l[2] & (HOST_WIDE_INT) 0xffffffff)
22860 | ((l[3] & (HOST_WIDE_INT) 0xffffffff) << 32),
22861 DImode);
22863 else
22864 gcc_unreachable ();
22868 return size;
22871 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22872 Return false when normal moves are needed; true when all required
22873 insns have been emitted. Operands 2-4 contain the input values
22874 int the correct order; operands 5-7 contain the output values. */
22876 void
22877 ix86_split_long_move (rtx operands[])
22879 rtx part[2][4];
22880 int nparts, i, j;
22881 int push = 0;
22882 int collisions = 0;
22883 machine_mode mode = GET_MODE (operands[0]);
22884 bool collisionparts[4];
22886 /* The DFmode expanders may ask us to move double.
22887 For 64bit target this is single move. By hiding the fact
22888 here we simplify i386.md splitters. */
22889 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22891 /* Optimize constant pool reference to immediates. This is used by
22892 fp moves, that force all constants to memory to allow combining. */
22894 if (MEM_P (operands[1])
22895 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22896 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22897 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22898 if (push_operand (operands[0], VOIDmode))
22900 operands[0] = copy_rtx (operands[0]);
22901 PUT_MODE (operands[0], word_mode);
22903 else
22904 operands[0] = gen_lowpart (DImode, operands[0]);
22905 operands[1] = gen_lowpart (DImode, operands[1]);
22906 emit_move_insn (operands[0], operands[1]);
22907 return;
22910 /* The only non-offsettable memory we handle is push. */
22911 if (push_operand (operands[0], VOIDmode))
22912 push = 1;
22913 else
22914 gcc_assert (!MEM_P (operands[0])
22915 || offsettable_memref_p (operands[0]));
22917 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22918 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22920 /* When emitting push, take care for source operands on the stack. */
22921 if (push && MEM_P (operands[1])
22922 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22924 rtx src_base = XEXP (part[1][nparts - 1], 0);
22926 /* Compensate for the stack decrement by 4. */
22927 if (!TARGET_64BIT && nparts == 3
22928 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22929 src_base = plus_constant (Pmode, src_base, 4);
22931 /* src_base refers to the stack pointer and is
22932 automatically decreased by emitted push. */
22933 for (i = 0; i < nparts; i++)
22934 part[1][i] = change_address (part[1][i],
22935 GET_MODE (part[1][i]), src_base);
22938 /* We need to do copy in the right order in case an address register
22939 of the source overlaps the destination. */
22940 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22942 rtx tmp;
22944 for (i = 0; i < nparts; i++)
22946 collisionparts[i]
22947 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22948 if (collisionparts[i])
22949 collisions++;
22952 /* Collision in the middle part can be handled by reordering. */
22953 if (collisions == 1 && nparts == 3 && collisionparts [1])
22955 std::swap (part[0][1], part[0][2]);
22956 std::swap (part[1][1], part[1][2]);
22958 else if (collisions == 1
22959 && nparts == 4
22960 && (collisionparts [1] || collisionparts [2]))
22962 if (collisionparts [1])
22964 std::swap (part[0][1], part[0][2]);
22965 std::swap (part[1][1], part[1][2]);
22967 else
22969 std::swap (part[0][2], part[0][3]);
22970 std::swap (part[1][2], part[1][3]);
22974 /* If there are more collisions, we can't handle it by reordering.
22975 Do an lea to the last part and use only one colliding move. */
22976 else if (collisions > 1)
22978 rtx base, addr, tls_base = NULL_RTX;
22980 collisions = 1;
22982 base = part[0][nparts - 1];
22984 /* Handle the case when the last part isn't valid for lea.
22985 Happens in 64-bit mode storing the 12-byte XFmode. */
22986 if (GET_MODE (base) != Pmode)
22987 base = gen_rtx_REG (Pmode, REGNO (base));
22989 addr = XEXP (part[1][0], 0);
22990 if (TARGET_TLS_DIRECT_SEG_REFS)
22992 struct ix86_address parts;
22993 int ok = ix86_decompose_address (addr, &parts);
22994 gcc_assert (ok);
22995 if (parts.seg == DEFAULT_TLS_SEG_REG)
22997 /* It is not valid to use %gs: or %fs: in
22998 lea though, so we need to remove it from the
22999 address used for lea and add it to each individual
23000 memory loads instead. */
23001 addr = copy_rtx (addr);
23002 rtx *x = &addr;
23003 while (GET_CODE (*x) == PLUS)
23005 for (i = 0; i < 2; i++)
23007 rtx u = XEXP (*x, i);
23008 if (GET_CODE (u) == ZERO_EXTEND)
23009 u = XEXP (u, 0);
23010 if (GET_CODE (u) == UNSPEC
23011 && XINT (u, 1) == UNSPEC_TP)
23013 tls_base = XEXP (*x, i);
23014 *x = XEXP (*x, 1 - i);
23015 break;
23018 if (tls_base)
23019 break;
23020 x = &XEXP (*x, 0);
23022 gcc_assert (tls_base);
23025 emit_insn (gen_rtx_SET (base, addr));
23026 if (tls_base)
23027 base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
23028 part[1][0] = replace_equiv_address (part[1][0], base);
23029 for (i = 1; i < nparts; i++)
23031 if (tls_base)
23032 base = copy_rtx (base);
23033 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
23034 part[1][i] = replace_equiv_address (part[1][i], tmp);
23039 if (push)
23041 if (!TARGET_64BIT)
23043 if (nparts == 3)
23045 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
23046 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
23047 stack_pointer_rtx, GEN_INT (-4)));
23048 emit_move_insn (part[0][2], part[1][2]);
23050 else if (nparts == 4)
23052 emit_move_insn (part[0][3], part[1][3]);
23053 emit_move_insn (part[0][2], part[1][2]);
23056 else
23058 /* In 64bit mode we don't have 32bit push available. In case this is
23059 register, it is OK - we will just use larger counterpart. We also
23060 retype memory - these comes from attempt to avoid REX prefix on
23061 moving of second half of TFmode value. */
23062 if (GET_MODE (part[1][1]) == SImode)
23064 switch (GET_CODE (part[1][1]))
23066 case MEM:
23067 part[1][1] = adjust_address (part[1][1], DImode, 0);
23068 break;
23070 case REG:
23071 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23072 break;
23074 default:
23075 gcc_unreachable ();
23078 if (GET_MODE (part[1][0]) == SImode)
23079 part[1][0] = part[1][1];
23082 emit_move_insn (part[0][1], part[1][1]);
23083 emit_move_insn (part[0][0], part[1][0]);
23084 return;
23087 /* Choose correct order to not overwrite the source before it is copied. */
23088 if ((REG_P (part[0][0])
23089 && REG_P (part[1][1])
23090 && (REGNO (part[0][0]) == REGNO (part[1][1])
23091 || (nparts == 3
23092 && REGNO (part[0][0]) == REGNO (part[1][2]))
23093 || (nparts == 4
23094 && REGNO (part[0][0]) == REGNO (part[1][3]))))
23095 || (collisions > 0
23096 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23098 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23100 operands[2 + i] = part[0][j];
23101 operands[6 + i] = part[1][j];
23104 else
23106 for (i = 0; i < nparts; i++)
23108 operands[2 + i] = part[0][i];
23109 operands[6 + i] = part[1][i];
23113 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
23114 if (optimize_insn_for_size_p ())
23116 for (j = 0; j < nparts - 1; j++)
23117 if (CONST_INT_P (operands[6 + j])
23118 && operands[6 + j] != const0_rtx
23119 && REG_P (operands[2 + j]))
23120 for (i = j; i < nparts - 1; i++)
23121 if (CONST_INT_P (operands[7 + i])
23122 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23123 operands[7 + i] = operands[2 + j];
23126 for (i = 0; i < nparts; i++)
23127 emit_move_insn (operands[2 + i], operands[6 + i]);
23129 return;
23132 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
23133 left shift by a constant, either using a single shift or
23134 a sequence of add instructions. */
23136 static void
23137 ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23139 rtx (*insn)(rtx, rtx, rtx);
23141 if (count == 1
23142 || (count * ix86_cost->add <= ix86_cost->shift_const
23143 && !optimize_insn_for_size_p ()))
23145 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23146 while (count-- > 0)
23147 emit_insn (insn (operand, operand, operand));
23149 else
23151 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23152 emit_insn (insn (operand, operand, GEN_INT (count)));
23156 void
23157 ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23159 rtx (*gen_ashl3)(rtx, rtx, rtx);
23160 rtx (*gen_shld)(rtx, rtx, rtx);
23161 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23163 rtx low[2], high[2];
23164 int count;
23166 if (CONST_INT_P (operands[2]))
23168 split_double_mode (mode, operands, 2, low, high);
23169 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23171 if (count >= half_width)
23173 emit_move_insn (high[0], low[1]);
23174 emit_move_insn (low[0], const0_rtx);
23176 if (count > half_width)
23177 ix86_expand_ashl_const (high[0], count - half_width, mode);
23179 else
23181 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23183 if (!rtx_equal_p (operands[0], operands[1]))
23184 emit_move_insn (operands[0], operands[1]);
23186 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23187 ix86_expand_ashl_const (low[0], count, mode);
23189 return;
23192 split_double_mode (mode, operands, 1, low, high);
23194 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23196 if (operands[1] == const1_rtx)
23198 /* Assuming we've chosen a QImode capable registers, then 1 << N
23199 can be done with two 32/64-bit shifts, no branches, no cmoves. */
23200 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23202 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23204 ix86_expand_clear (low[0]);
23205 ix86_expand_clear (high[0]);
23206 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23208 d = gen_lowpart (QImode, low[0]);
23209 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23210 s = gen_rtx_EQ (QImode, flags, const0_rtx);
23211 emit_insn (gen_rtx_SET (d, s));
23213 d = gen_lowpart (QImode, high[0]);
23214 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23215 s = gen_rtx_NE (QImode, flags, const0_rtx);
23216 emit_insn (gen_rtx_SET (d, s));
23219 /* Otherwise, we can get the same results by manually performing
23220 a bit extract operation on bit 5/6, and then performing the two
23221 shifts. The two methods of getting 0/1 into low/high are exactly
23222 the same size. Avoiding the shift in the bit extract case helps
23223 pentium4 a bit; no one else seems to care much either way. */
23224 else
23226 machine_mode half_mode;
23227 rtx (*gen_lshr3)(rtx, rtx, rtx);
23228 rtx (*gen_and3)(rtx, rtx, rtx);
23229 rtx (*gen_xor3)(rtx, rtx, rtx);
23230 HOST_WIDE_INT bits;
23231 rtx x;
23233 if (mode == DImode)
23235 half_mode = SImode;
23236 gen_lshr3 = gen_lshrsi3;
23237 gen_and3 = gen_andsi3;
23238 gen_xor3 = gen_xorsi3;
23239 bits = 5;
23241 else
23243 half_mode = DImode;
23244 gen_lshr3 = gen_lshrdi3;
23245 gen_and3 = gen_anddi3;
23246 gen_xor3 = gen_xordi3;
23247 bits = 6;
23250 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23251 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23252 else
23253 x = gen_lowpart (half_mode, operands[2]);
23254 emit_insn (gen_rtx_SET (high[0], x));
23256 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23257 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23258 emit_move_insn (low[0], high[0]);
23259 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23262 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23263 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23264 return;
23267 if (operands[1] == constm1_rtx)
23269 /* For -1 << N, we can avoid the shld instruction, because we
23270 know that we're shifting 0...31/63 ones into a -1. */
23271 emit_move_insn (low[0], constm1_rtx);
23272 if (optimize_insn_for_size_p ())
23273 emit_move_insn (high[0], low[0]);
23274 else
23275 emit_move_insn (high[0], constm1_rtx);
23277 else
23279 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23281 if (!rtx_equal_p (operands[0], operands[1]))
23282 emit_move_insn (operands[0], operands[1]);
23284 split_double_mode (mode, operands, 1, low, high);
23285 emit_insn (gen_shld (high[0], low[0], operands[2]));
23288 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23290 if (TARGET_CMOVE && scratch)
23292 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23293 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23295 ix86_expand_clear (scratch);
23296 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23298 else
23300 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23301 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23303 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23307 void
23308 ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23310 rtx (*gen_ashr3)(rtx, rtx, rtx)
23311 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23312 rtx (*gen_shrd)(rtx, rtx, rtx);
23313 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23315 rtx low[2], high[2];
23316 int count;
23318 if (CONST_INT_P (operands[2]))
23320 split_double_mode (mode, operands, 2, low, high);
23321 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23323 if (count == GET_MODE_BITSIZE (mode) - 1)
23325 emit_move_insn (high[0], high[1]);
23326 emit_insn (gen_ashr3 (high[0], high[0],
23327 GEN_INT (half_width - 1)));
23328 emit_move_insn (low[0], high[0]);
23331 else if (count >= half_width)
23333 emit_move_insn (low[0], high[1]);
23334 emit_move_insn (high[0], low[0]);
23335 emit_insn (gen_ashr3 (high[0], high[0],
23336 GEN_INT (half_width - 1)));
23338 if (count > half_width)
23339 emit_insn (gen_ashr3 (low[0], low[0],
23340 GEN_INT (count - half_width)));
23342 else
23344 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23346 if (!rtx_equal_p (operands[0], operands[1]))
23347 emit_move_insn (operands[0], operands[1]);
23349 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23350 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23353 else
23355 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23357 if (!rtx_equal_p (operands[0], operands[1]))
23358 emit_move_insn (operands[0], operands[1]);
23360 split_double_mode (mode, operands, 1, low, high);
23362 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23363 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23365 if (TARGET_CMOVE && scratch)
23367 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23368 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23370 emit_move_insn (scratch, high[0]);
23371 emit_insn (gen_ashr3 (scratch, scratch,
23372 GEN_INT (half_width - 1)));
23373 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23374 scratch));
23376 else
23378 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23379 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23381 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23386 void
23387 ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23389 rtx (*gen_lshr3)(rtx, rtx, rtx)
23390 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23391 rtx (*gen_shrd)(rtx, rtx, rtx);
23392 int half_width = GET_MODE_BITSIZE (mode) >> 1;
23394 rtx low[2], high[2];
23395 int count;
23397 if (CONST_INT_P (operands[2]))
23399 split_double_mode (mode, operands, 2, low, high);
23400 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23402 if (count >= half_width)
23404 emit_move_insn (low[0], high[1]);
23405 ix86_expand_clear (high[0]);
23407 if (count > half_width)
23408 emit_insn (gen_lshr3 (low[0], low[0],
23409 GEN_INT (count - half_width)));
23411 else
23413 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23415 if (!rtx_equal_p (operands[0], operands[1]))
23416 emit_move_insn (operands[0], operands[1]);
23418 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23419 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23422 else
23424 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23426 if (!rtx_equal_p (operands[0], operands[1]))
23427 emit_move_insn (operands[0], operands[1]);
23429 split_double_mode (mode, operands, 1, low, high);
23431 emit_insn (gen_shrd (low[0], high[0], operands[2]));
23432 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23434 if (TARGET_CMOVE && scratch)
23436 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23437 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23439 ix86_expand_clear (scratch);
23440 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23441 scratch));
23443 else
23445 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23446 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23448 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23453 /* Predict just emitted jump instruction to be taken with probability PROB. */
23454 static void
23455 predict_jump (int prob)
23457 rtx insn = get_last_insn ();
23458 gcc_assert (JUMP_P (insn));
23459 add_int_reg_note (insn, REG_BR_PROB, prob);
23462 /* Helper function for the string operations below. Dest VARIABLE whether
23463 it is aligned to VALUE bytes. If true, jump to the label. */
23464 static rtx_code_label *
23465 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23467 rtx_code_label *label = gen_label_rtx ();
23468 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23469 if (GET_MODE (variable) == DImode)
23470 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23471 else
23472 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23473 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23474 1, label);
23475 if (epilogue)
23476 predict_jump (REG_BR_PROB_BASE * 50 / 100);
23477 else
23478 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23479 return label;
23482 /* Adjust COUNTER by the VALUE. */
23483 static void
23484 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23486 rtx (*gen_add)(rtx, rtx, rtx)
23487 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23489 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23492 /* Zero extend possibly SImode EXP to Pmode register. */
23494 ix86_zero_extend_to_Pmode (rtx exp)
23496 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23499 /* Divide COUNTREG by SCALE. */
23500 static rtx
23501 scale_counter (rtx countreg, int scale)
23503 rtx sc;
23505 if (scale == 1)
23506 return countreg;
23507 if (CONST_INT_P (countreg))
23508 return GEN_INT (INTVAL (countreg) / scale);
23509 gcc_assert (REG_P (countreg));
23511 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23512 GEN_INT (exact_log2 (scale)),
23513 NULL, 1, OPTAB_DIRECT);
23514 return sc;
23517 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
23518 DImode for constant loop counts. */
23520 static machine_mode
23521 counter_mode (rtx count_exp)
23523 if (GET_MODE (count_exp) != VOIDmode)
23524 return GET_MODE (count_exp);
23525 if (!CONST_INT_P (count_exp))
23526 return Pmode;
23527 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23528 return DImode;
23529 return SImode;
23532 /* Copy the address to a Pmode register. This is used for x32 to
23533 truncate DImode TLS address to a SImode register. */
23535 static rtx
23536 ix86_copy_addr_to_reg (rtx addr)
23538 rtx reg;
23539 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23541 reg = copy_addr_to_reg (addr);
23542 REG_POINTER (reg) = 1;
23543 return reg;
23545 else
23547 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23548 reg = copy_to_mode_reg (DImode, addr);
23549 REG_POINTER (reg) = 1;
23550 return gen_rtx_SUBREG (SImode, reg, 0);
23554 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23555 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23556 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
23557 memory by VALUE (supposed to be in MODE).
23559 The size is rounded down to whole number of chunk size moved at once.
23560 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
23563 static void
23564 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23565 rtx destptr, rtx srcptr, rtx value,
23566 rtx count, machine_mode mode, int unroll,
23567 int expected_size, bool issetmem)
23569 rtx_code_label *out_label, *top_label;
23570 rtx iter, tmp;
23571 machine_mode iter_mode = counter_mode (count);
23572 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23573 rtx piece_size = GEN_INT (piece_size_n);
23574 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23575 rtx size;
23576 int i;
23578 top_label = gen_label_rtx ();
23579 out_label = gen_label_rtx ();
23580 iter = gen_reg_rtx (iter_mode);
23582 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23583 NULL, 1, OPTAB_DIRECT);
23584 /* Those two should combine. */
23585 if (piece_size == const1_rtx)
23587 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23588 true, out_label);
23589 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23591 emit_move_insn (iter, const0_rtx);
23593 emit_label (top_label);
23595 tmp = convert_modes (Pmode, iter_mode, iter, true);
23597 /* This assert could be relaxed - in this case we'll need to compute
23598 smallest power of two, containing in PIECE_SIZE_N and pass it to
23599 offset_address. */
23600 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23601 destmem = offset_address (destmem, tmp, piece_size_n);
23602 destmem = adjust_address (destmem, mode, 0);
23604 if (!issetmem)
23606 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23607 srcmem = adjust_address (srcmem, mode, 0);
23609 /* When unrolling for chips that reorder memory reads and writes,
23610 we can save registers by using single temporary.
23611 Also using 4 temporaries is overkill in 32bit mode. */
23612 if (!TARGET_64BIT && 0)
23614 for (i = 0; i < unroll; i++)
23616 if (i)
23618 destmem =
23619 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23620 srcmem =
23621 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23623 emit_move_insn (destmem, srcmem);
23626 else
23628 rtx tmpreg[4];
23629 gcc_assert (unroll <= 4);
23630 for (i = 0; i < unroll; i++)
23632 tmpreg[i] = gen_reg_rtx (mode);
23633 if (i)
23635 srcmem =
23636 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23638 emit_move_insn (tmpreg[i], srcmem);
23640 for (i = 0; i < unroll; i++)
23642 if (i)
23644 destmem =
23645 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23647 emit_move_insn (destmem, tmpreg[i]);
23651 else
23652 for (i = 0; i < unroll; i++)
23654 if (i)
23655 destmem =
23656 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23657 emit_move_insn (destmem, value);
23660 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23661 true, OPTAB_LIB_WIDEN);
23662 if (tmp != iter)
23663 emit_move_insn (iter, tmp);
23665 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23666 true, top_label);
23667 if (expected_size != -1)
23669 expected_size /= GET_MODE_SIZE (mode) * unroll;
23670 if (expected_size == 0)
23671 predict_jump (0);
23672 else if (expected_size > REG_BR_PROB_BASE)
23673 predict_jump (REG_BR_PROB_BASE - 1);
23674 else
23675 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23677 else
23678 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23679 iter = ix86_zero_extend_to_Pmode (iter);
23680 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23681 true, OPTAB_LIB_WIDEN);
23682 if (tmp != destptr)
23683 emit_move_insn (destptr, tmp);
23684 if (!issetmem)
23686 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23687 true, OPTAB_LIB_WIDEN);
23688 if (tmp != srcptr)
23689 emit_move_insn (srcptr, tmp);
23691 emit_label (out_label);
23694 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23695 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23696 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23697 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23698 ORIG_VALUE is the original value passed to memset to fill the memory with.
23699 Other arguments have same meaning as for previous function. */
23701 static void
23702 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23703 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23704 rtx count,
23705 machine_mode mode, bool issetmem)
23707 rtx destexp;
23708 rtx srcexp;
23709 rtx countreg;
23710 HOST_WIDE_INT rounded_count;
23712 /* If possible, it is shorter to use rep movs.
23713 TODO: Maybe it is better to move this logic to decide_alg. */
23714 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23715 && (!issetmem || orig_value == const0_rtx))
23716 mode = SImode;
23718 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23719 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23721 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23722 GET_MODE_SIZE (mode)));
23723 if (mode != QImode)
23725 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23726 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23727 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23729 else
23730 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23731 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23733 rounded_count = (INTVAL (count)
23734 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23735 destmem = shallow_copy_rtx (destmem);
23736 set_mem_size (destmem, rounded_count);
23738 else if (MEM_SIZE_KNOWN_P (destmem))
23739 clear_mem_size (destmem);
23741 if (issetmem)
23743 value = force_reg (mode, gen_lowpart (mode, value));
23744 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23746 else
23748 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23749 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23750 if (mode != QImode)
23752 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23753 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23754 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23756 else
23757 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23758 if (CONST_INT_P (count))
23760 rounded_count = (INTVAL (count)
23761 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23762 srcmem = shallow_copy_rtx (srcmem);
23763 set_mem_size (srcmem, rounded_count);
23765 else
23767 if (MEM_SIZE_KNOWN_P (srcmem))
23768 clear_mem_size (srcmem);
23770 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23771 destexp, srcexp));
23775 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23776 DESTMEM.
23777 SRC is passed by pointer to be updated on return.
23778 Return value is updated DST. */
23779 static rtx
23780 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23781 HOST_WIDE_INT size_to_move)
23783 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23784 enum insn_code code;
23785 machine_mode move_mode;
23786 int piece_size, i;
23788 /* Find the widest mode in which we could perform moves.
23789 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23790 it until move of such size is supported. */
23791 piece_size = 1 << floor_log2 (size_to_move);
23792 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23793 code = optab_handler (mov_optab, move_mode);
23794 while (code == CODE_FOR_nothing && piece_size > 1)
23796 piece_size >>= 1;
23797 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23798 code = optab_handler (mov_optab, move_mode);
23801 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23802 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23803 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23805 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23806 move_mode = mode_for_vector (word_mode, nunits);
23807 code = optab_handler (mov_optab, move_mode);
23808 if (code == CODE_FOR_nothing)
23810 move_mode = word_mode;
23811 piece_size = GET_MODE_SIZE (move_mode);
23812 code = optab_handler (mov_optab, move_mode);
23815 gcc_assert (code != CODE_FOR_nothing);
23817 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23818 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23820 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23821 gcc_assert (size_to_move % piece_size == 0);
23822 adjust = GEN_INT (piece_size);
23823 for (i = 0; i < size_to_move; i += piece_size)
23825 /* We move from memory to memory, so we'll need to do it via
23826 a temporary register. */
23827 tempreg = gen_reg_rtx (move_mode);
23828 emit_insn (GEN_FCN (code) (tempreg, src));
23829 emit_insn (GEN_FCN (code) (dst, tempreg));
23831 emit_move_insn (destptr,
23832 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23833 emit_move_insn (srcptr,
23834 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23836 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23837 piece_size);
23838 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23839 piece_size);
23842 /* Update DST and SRC rtx. */
23843 *srcmem = src;
23844 return dst;
23847 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23848 static void
23849 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23850 rtx destptr, rtx srcptr, rtx count, int max_size)
23852 rtx src, dest;
23853 if (CONST_INT_P (count))
23855 HOST_WIDE_INT countval = INTVAL (count);
23856 HOST_WIDE_INT epilogue_size = countval % max_size;
23857 int i;
23859 /* For now MAX_SIZE should be a power of 2. This assert could be
23860 relaxed, but it'll require a bit more complicated epilogue
23861 expanding. */
23862 gcc_assert ((max_size & (max_size - 1)) == 0);
23863 for (i = max_size; i >= 1; i >>= 1)
23865 if (epilogue_size & i)
23866 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23868 return;
23870 if (max_size > 8)
23872 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23873 count, 1, OPTAB_DIRECT);
23874 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23875 count, QImode, 1, 4, false);
23876 return;
23879 /* When there are stringops, we can cheaply increase dest and src pointers.
23880 Otherwise we save code size by maintaining offset (zero is readily
23881 available from preceding rep operation) and using x86 addressing modes.
23883 if (TARGET_SINGLE_STRINGOP)
23885 if (max_size > 4)
23887 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23888 src = change_address (srcmem, SImode, srcptr);
23889 dest = change_address (destmem, SImode, destptr);
23890 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23891 emit_label (label);
23892 LABEL_NUSES (label) = 1;
23894 if (max_size > 2)
23896 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23897 src = change_address (srcmem, HImode, srcptr);
23898 dest = change_address (destmem, HImode, destptr);
23899 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23900 emit_label (label);
23901 LABEL_NUSES (label) = 1;
23903 if (max_size > 1)
23905 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23906 src = change_address (srcmem, QImode, srcptr);
23907 dest = change_address (destmem, QImode, destptr);
23908 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23909 emit_label (label);
23910 LABEL_NUSES (label) = 1;
23913 else
23915 rtx offset = force_reg (Pmode, const0_rtx);
23916 rtx tmp;
23918 if (max_size > 4)
23920 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23921 src = change_address (srcmem, SImode, srcptr);
23922 dest = change_address (destmem, SImode, destptr);
23923 emit_move_insn (dest, src);
23924 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23925 true, OPTAB_LIB_WIDEN);
23926 if (tmp != offset)
23927 emit_move_insn (offset, tmp);
23928 emit_label (label);
23929 LABEL_NUSES (label) = 1;
23931 if (max_size > 2)
23933 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23934 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23935 src = change_address (srcmem, HImode, tmp);
23936 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23937 dest = change_address (destmem, HImode, tmp);
23938 emit_move_insn (dest, src);
23939 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23940 true, OPTAB_LIB_WIDEN);
23941 if (tmp != offset)
23942 emit_move_insn (offset, tmp);
23943 emit_label (label);
23944 LABEL_NUSES (label) = 1;
23946 if (max_size > 1)
23948 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23949 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23950 src = change_address (srcmem, QImode, tmp);
23951 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23952 dest = change_address (destmem, QImode, tmp);
23953 emit_move_insn (dest, src);
23954 emit_label (label);
23955 LABEL_NUSES (label) = 1;
23960 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23961 with value PROMOTED_VAL.
23962 SRC is passed by pointer to be updated on return.
23963 Return value is updated DST. */
23964 static rtx
23965 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23966 HOST_WIDE_INT size_to_move)
23968 rtx dst = destmem, adjust;
23969 enum insn_code code;
23970 machine_mode move_mode;
23971 int piece_size, i;
23973 /* Find the widest mode in which we could perform moves.
23974 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23975 it until move of such size is supported. */
23976 move_mode = GET_MODE (promoted_val);
23977 if (move_mode == VOIDmode)
23978 move_mode = QImode;
23979 if (size_to_move < GET_MODE_SIZE (move_mode))
23981 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23982 promoted_val = gen_lowpart (move_mode, promoted_val);
23984 piece_size = GET_MODE_SIZE (move_mode);
23985 code = optab_handler (mov_optab, move_mode);
23986 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23988 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23990 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23991 gcc_assert (size_to_move % piece_size == 0);
23992 adjust = GEN_INT (piece_size);
23993 for (i = 0; i < size_to_move; i += piece_size)
23995 if (piece_size <= GET_MODE_SIZE (word_mode))
23997 emit_insn (gen_strset (destptr, dst, promoted_val));
23998 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23999 piece_size);
24000 continue;
24003 emit_insn (GEN_FCN (code) (dst, promoted_val));
24005 emit_move_insn (destptr,
24006 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24008 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24009 piece_size);
24012 /* Update DST rtx. */
24013 return dst;
24015 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24016 static void
24017 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
24018 rtx count, int max_size)
24020 count =
24021 expand_simple_binop (counter_mode (count), AND, count,
24022 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
24023 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
24024 gen_lowpart (QImode, value), count, QImode,
24025 1, max_size / 2, true);
24028 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
24029 static void
24030 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
24031 rtx count, int max_size)
24033 rtx dest;
24035 if (CONST_INT_P (count))
24037 HOST_WIDE_INT countval = INTVAL (count);
24038 HOST_WIDE_INT epilogue_size = countval % max_size;
24039 int i;
24041 /* For now MAX_SIZE should be a power of 2. This assert could be
24042 relaxed, but it'll require a bit more complicated epilogue
24043 expanding. */
24044 gcc_assert ((max_size & (max_size - 1)) == 0);
24045 for (i = max_size; i >= 1; i >>= 1)
24047 if (epilogue_size & i)
24049 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24050 destmem = emit_memset (destmem, destptr, vec_value, i);
24051 else
24052 destmem = emit_memset (destmem, destptr, value, i);
24055 return;
24057 if (max_size > 32)
24059 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24060 return;
24062 if (max_size > 16)
24064 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24065 if (TARGET_64BIT)
24067 dest = change_address (destmem, DImode, destptr);
24068 emit_insn (gen_strset (destptr, dest, value));
24069 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24070 emit_insn (gen_strset (destptr, dest, value));
24072 else
24074 dest = change_address (destmem, SImode, destptr);
24075 emit_insn (gen_strset (destptr, dest, value));
24076 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24077 emit_insn (gen_strset (destptr, dest, value));
24078 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24079 emit_insn (gen_strset (destptr, dest, value));
24080 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24081 emit_insn (gen_strset (destptr, dest, value));
24083 emit_label (label);
24084 LABEL_NUSES (label) = 1;
24086 if (max_size > 8)
24088 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24089 if (TARGET_64BIT)
24091 dest = change_address (destmem, DImode, destptr);
24092 emit_insn (gen_strset (destptr, dest, value));
24094 else
24096 dest = change_address (destmem, SImode, destptr);
24097 emit_insn (gen_strset (destptr, dest, value));
24098 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24099 emit_insn (gen_strset (destptr, dest, value));
24101 emit_label (label);
24102 LABEL_NUSES (label) = 1;
24104 if (max_size > 4)
24106 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24107 dest = change_address (destmem, SImode, destptr);
24108 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24109 emit_label (label);
24110 LABEL_NUSES (label) = 1;
24112 if (max_size > 2)
24114 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24115 dest = change_address (destmem, HImode, destptr);
24116 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24117 emit_label (label);
24118 LABEL_NUSES (label) = 1;
24120 if (max_size > 1)
24122 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24123 dest = change_address (destmem, QImode, destptr);
24124 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24125 emit_label (label);
24126 LABEL_NUSES (label) = 1;
24130 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24131 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
24132 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24133 ignored.
24134 Return value is updated DESTMEM. */
24135 static rtx
24136 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24137 rtx destptr, rtx srcptr, rtx value,
24138 rtx vec_value, rtx count, int align,
24139 int desired_alignment, bool issetmem)
24141 int i;
24142 for (i = 1; i < desired_alignment; i <<= 1)
24144 if (align <= i)
24146 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24147 if (issetmem)
24149 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24150 destmem = emit_memset (destmem, destptr, vec_value, i);
24151 else
24152 destmem = emit_memset (destmem, destptr, value, i);
24154 else
24155 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24156 ix86_adjust_counter (count, i);
24157 emit_label (label);
24158 LABEL_NUSES (label) = 1;
24159 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24162 return destmem;
24165 /* Test if COUNT&SIZE is nonzero and if so, expand movme
24166 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24167 and jump to DONE_LABEL. */
24168 static void
24169 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24170 rtx destptr, rtx srcptr,
24171 rtx value, rtx vec_value,
24172 rtx count, int size,
24173 rtx done_label, bool issetmem)
24175 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24176 machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24177 rtx modesize;
24178 int n;
24180 /* If we do not have vector value to copy, we must reduce size. */
24181 if (issetmem)
24183 if (!vec_value)
24185 if (GET_MODE (value) == VOIDmode && size > 8)
24186 mode = Pmode;
24187 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24188 mode = GET_MODE (value);
24190 else
24191 mode = GET_MODE (vec_value), value = vec_value;
24193 else
24195 /* Choose appropriate vector mode. */
24196 if (size >= 32)
24197 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24198 else if (size >= 16)
24199 mode = TARGET_SSE ? V16QImode : DImode;
24200 srcmem = change_address (srcmem, mode, srcptr);
24202 destmem = change_address (destmem, mode, destptr);
24203 modesize = GEN_INT (GET_MODE_SIZE (mode));
24204 gcc_assert (GET_MODE_SIZE (mode) <= size);
24205 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24207 if (issetmem)
24208 emit_move_insn (destmem, gen_lowpart (mode, value));
24209 else
24211 emit_move_insn (destmem, srcmem);
24212 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24214 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24217 destmem = offset_address (destmem, count, 1);
24218 destmem = offset_address (destmem, GEN_INT (-2 * size),
24219 GET_MODE_SIZE (mode));
24220 if (!issetmem)
24222 srcmem = offset_address (srcmem, count, 1);
24223 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24224 GET_MODE_SIZE (mode));
24226 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24228 if (issetmem)
24229 emit_move_insn (destmem, gen_lowpart (mode, value));
24230 else
24232 emit_move_insn (destmem, srcmem);
24233 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24235 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24237 emit_jump_insn (gen_jump (done_label));
24238 emit_barrier ();
24240 emit_label (label);
24241 LABEL_NUSES (label) = 1;
24244 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24245 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24246 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24247 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24248 DONE_LABEL is a label after the whole copying sequence. The label is created
24249 on demand if *DONE_LABEL is NULL.
24250 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
24251 bounds after the initial copies.
24253 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24254 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24255 we will dispatch to a library call for large blocks.
24257 In pseudocode we do:
24259 if (COUNT < SIZE)
24261 Assume that SIZE is 4. Bigger sizes are handled analogously
24262 if (COUNT & 4)
24264 copy 4 bytes from SRCPTR to DESTPTR
24265 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24266 goto done_label
24268 if (!COUNT)
24269 goto done_label;
24270 copy 1 byte from SRCPTR to DESTPTR
24271 if (COUNT & 2)
24273 copy 2 bytes from SRCPTR to DESTPTR
24274 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24277 else
24279 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24280 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24282 OLD_DESPTR = DESTPTR;
24283 Align DESTPTR up to DESIRED_ALIGN
24284 SRCPTR += DESTPTR - OLD_DESTPTR
24285 COUNT -= DEST_PTR - OLD_DESTPTR
24286 if (DYNAMIC_CHECK)
24287 Round COUNT down to multiple of SIZE
24288 << optional caller supplied zero size guard is here >>
24289 << optional caller suppplied dynamic check is here >>
24290 << caller supplied main copy loop is here >>
24292 done_label:
24294 static void
24295 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24296 rtx *destptr, rtx *srcptr,
24297 machine_mode mode,
24298 rtx value, rtx vec_value,
24299 rtx *count,
24300 rtx_code_label **done_label,
24301 int size,
24302 int desired_align,
24303 int align,
24304 unsigned HOST_WIDE_INT *min_size,
24305 bool dynamic_check,
24306 bool issetmem)
24308 rtx_code_label *loop_label = NULL, *label;
24309 int n;
24310 rtx modesize;
24311 int prolog_size = 0;
24312 rtx mode_value;
24314 /* Chose proper value to copy. */
24315 if (issetmem && VECTOR_MODE_P (mode))
24316 mode_value = vec_value;
24317 else
24318 mode_value = value;
24319 gcc_assert (GET_MODE_SIZE (mode) <= size);
24321 /* See if block is big or small, handle small blocks. */
24322 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24324 int size2 = size;
24325 loop_label = gen_label_rtx ();
24327 if (!*done_label)
24328 *done_label = gen_label_rtx ();
24330 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24331 1, loop_label);
24332 size2 >>= 1;
24334 /* Handle sizes > 3. */
24335 for (;size2 > 2; size2 >>= 1)
24336 expand_small_movmem_or_setmem (destmem, srcmem,
24337 *destptr, *srcptr,
24338 value, vec_value,
24339 *count,
24340 size2, *done_label, issetmem);
24341 /* Nothing to copy? Jump to DONE_LABEL if so */
24342 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24343 1, *done_label);
24345 /* Do a byte copy. */
24346 destmem = change_address (destmem, QImode, *destptr);
24347 if (issetmem)
24348 emit_move_insn (destmem, gen_lowpart (QImode, value));
24349 else
24351 srcmem = change_address (srcmem, QImode, *srcptr);
24352 emit_move_insn (destmem, srcmem);
24355 /* Handle sizes 2 and 3. */
24356 label = ix86_expand_aligntest (*count, 2, false);
24357 destmem = change_address (destmem, HImode, *destptr);
24358 destmem = offset_address (destmem, *count, 1);
24359 destmem = offset_address (destmem, GEN_INT (-2), 2);
24360 if (issetmem)
24361 emit_move_insn (destmem, gen_lowpart (HImode, value));
24362 else
24364 srcmem = change_address (srcmem, HImode, *srcptr);
24365 srcmem = offset_address (srcmem, *count, 1);
24366 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24367 emit_move_insn (destmem, srcmem);
24370 emit_label (label);
24371 LABEL_NUSES (label) = 1;
24372 emit_jump_insn (gen_jump (*done_label));
24373 emit_barrier ();
24375 else
24376 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24377 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24379 /* Start memcpy for COUNT >= SIZE. */
24380 if (loop_label)
24382 emit_label (loop_label);
24383 LABEL_NUSES (loop_label) = 1;
24386 /* Copy first desired_align bytes. */
24387 if (!issetmem)
24388 srcmem = change_address (srcmem, mode, *srcptr);
24389 destmem = change_address (destmem, mode, *destptr);
24390 modesize = GEN_INT (GET_MODE_SIZE (mode));
24391 for (n = 0; prolog_size < desired_align - align; n++)
24393 if (issetmem)
24394 emit_move_insn (destmem, mode_value);
24395 else
24397 emit_move_insn (destmem, srcmem);
24398 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24400 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24401 prolog_size += GET_MODE_SIZE (mode);
24405 /* Copy last SIZE bytes. */
24406 destmem = offset_address (destmem, *count, 1);
24407 destmem = offset_address (destmem,
24408 GEN_INT (-size - prolog_size),
24410 if (issetmem)
24411 emit_move_insn (destmem, mode_value);
24412 else
24414 srcmem = offset_address (srcmem, *count, 1);
24415 srcmem = offset_address (srcmem,
24416 GEN_INT (-size - prolog_size),
24418 emit_move_insn (destmem, srcmem);
24420 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24422 destmem = offset_address (destmem, modesize, 1);
24423 if (issetmem)
24424 emit_move_insn (destmem, mode_value);
24425 else
24427 srcmem = offset_address (srcmem, modesize, 1);
24428 emit_move_insn (destmem, srcmem);
24432 /* Align destination. */
24433 if (desired_align > 1 && desired_align > align)
24435 rtx saveddest = *destptr;
24437 gcc_assert (desired_align <= size);
24438 /* Align destptr up, place it to new register. */
24439 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24440 GEN_INT (prolog_size),
24441 NULL_RTX, 1, OPTAB_DIRECT);
24442 if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24443 REG_POINTER (*destptr) = 1;
24444 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24445 GEN_INT (-desired_align),
24446 *destptr, 1, OPTAB_DIRECT);
24447 /* See how many bytes we skipped. */
24448 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24449 *destptr,
24450 saveddest, 1, OPTAB_DIRECT);
24451 /* Adjust srcptr and count. */
24452 if (!issetmem)
24453 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24454 saveddest, *srcptr, 1, OPTAB_DIRECT);
24455 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24456 saveddest, *count, 1, OPTAB_DIRECT);
24457 /* We copied at most size + prolog_size. */
24458 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24459 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24460 else
24461 *min_size = 0;
24463 /* Our loops always round down the bock size, but for dispatch to library
24464 we need precise value. */
24465 if (dynamic_check)
24466 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
24467 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24469 else
24471 gcc_assert (prolog_size == 0);
24472 /* Decrease count, so we won't end up copying last word twice. */
24473 if (!CONST_INT_P (*count))
24474 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24475 constm1_rtx, *count, 1, OPTAB_DIRECT);
24476 else
24477 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24478 if (*min_size)
24479 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24484 /* This function is like the previous one, except here we know how many bytes
24485 need to be copied. That allows us to update alignment not only of DST, which
24486 is returned, but also of SRC, which is passed as a pointer for that
24487 reason. */
24488 static rtx
24489 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24490 rtx srcreg, rtx value, rtx vec_value,
24491 int desired_align, int align_bytes,
24492 bool issetmem)
24494 rtx src = NULL;
24495 rtx orig_dst = dst;
24496 rtx orig_src = NULL;
24497 int piece_size = 1;
24498 int copied_bytes = 0;
24500 if (!issetmem)
24502 gcc_assert (srcp != NULL);
24503 src = *srcp;
24504 orig_src = src;
24507 for (piece_size = 1;
24508 piece_size <= desired_align && copied_bytes < align_bytes;
24509 piece_size <<= 1)
24511 if (align_bytes & piece_size)
24513 if (issetmem)
24515 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24516 dst = emit_memset (dst, destreg, vec_value, piece_size);
24517 else
24518 dst = emit_memset (dst, destreg, value, piece_size);
24520 else
24521 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24522 copied_bytes += piece_size;
24525 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24526 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24527 if (MEM_SIZE_KNOWN_P (orig_dst))
24528 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24530 if (!issetmem)
24532 int src_align_bytes = get_mem_align_offset (src, desired_align
24533 * BITS_PER_UNIT);
24534 if (src_align_bytes >= 0)
24535 src_align_bytes = desired_align - src_align_bytes;
24536 if (src_align_bytes >= 0)
24538 unsigned int src_align;
24539 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24541 if ((src_align_bytes & (src_align - 1))
24542 == (align_bytes & (src_align - 1)))
24543 break;
24545 if (src_align > (unsigned int) desired_align)
24546 src_align = desired_align;
24547 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24548 set_mem_align (src, src_align * BITS_PER_UNIT);
24550 if (MEM_SIZE_KNOWN_P (orig_src))
24551 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24552 *srcp = src;
24555 return dst;
24558 /* Return true if ALG can be used in current context.
24559 Assume we expand memset if MEMSET is true. */
24560 static bool
24561 alg_usable_p (enum stringop_alg alg, bool memset)
24563 if (alg == no_stringop)
24564 return false;
24565 if (alg == vector_loop)
24566 return TARGET_SSE || TARGET_AVX;
24567 /* Algorithms using the rep prefix want at least edi and ecx;
24568 additionally, memset wants eax and memcpy wants esi. Don't
24569 consider such algorithms if the user has appropriated those
24570 registers for their own purposes. */
24571 if (alg == rep_prefix_1_byte
24572 || alg == rep_prefix_4_byte
24573 || alg == rep_prefix_8_byte)
24574 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24575 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24576 return true;
24579 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
24580 static enum stringop_alg
24581 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24582 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24583 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24585 const struct stringop_algs * algs;
24586 bool optimize_for_speed;
24587 int max = 0;
24588 const struct processor_costs *cost;
24589 int i;
24590 bool any_alg_usable_p = false;
24592 *noalign = false;
24593 *dynamic_check = -1;
24595 /* Even if the string operation call is cold, we still might spend a lot
24596 of time processing large blocks. */
24597 if (optimize_function_for_size_p (cfun)
24598 || (optimize_insn_for_size_p ()
24599 && (max_size < 256
24600 || (expected_size != -1 && expected_size < 256))))
24601 optimize_for_speed = false;
24602 else
24603 optimize_for_speed = true;
24605 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24606 if (memset)
24607 algs = &cost->memset[TARGET_64BIT != 0];
24608 else
24609 algs = &cost->memcpy[TARGET_64BIT != 0];
24611 /* See maximal size for user defined algorithm. */
24612 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24614 enum stringop_alg candidate = algs->size[i].alg;
24615 bool usable = alg_usable_p (candidate, memset);
24616 any_alg_usable_p |= usable;
24618 if (candidate != libcall && candidate && usable)
24619 max = algs->size[i].max;
24622 /* If expected size is not known but max size is small enough
24623 so inline version is a win, set expected size into
24624 the range. */
24625 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24626 && expected_size == -1)
24627 expected_size = min_size / 2 + max_size / 2;
24629 /* If user specified the algorithm, honnor it if possible. */
24630 if (ix86_stringop_alg != no_stringop
24631 && alg_usable_p (ix86_stringop_alg, memset))
24632 return ix86_stringop_alg;
24633 /* rep; movq or rep; movl is the smallest variant. */
24634 else if (!optimize_for_speed)
24636 *noalign = true;
24637 if (!count || (count & 3) || (memset && !zero_memset))
24638 return alg_usable_p (rep_prefix_1_byte, memset)
24639 ? rep_prefix_1_byte : loop_1_byte;
24640 else
24641 return alg_usable_p (rep_prefix_4_byte, memset)
24642 ? rep_prefix_4_byte : loop;
24644 /* Very tiny blocks are best handled via the loop, REP is expensive to
24645 setup. */
24646 else if (expected_size != -1 && expected_size < 4)
24647 return loop_1_byte;
24648 else if (expected_size != -1)
24650 enum stringop_alg alg = libcall;
24651 bool alg_noalign = false;
24652 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24654 /* We get here if the algorithms that were not libcall-based
24655 were rep-prefix based and we are unable to use rep prefixes
24656 based on global register usage. Break out of the loop and
24657 use the heuristic below. */
24658 if (algs->size[i].max == 0)
24659 break;
24660 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24662 enum stringop_alg candidate = algs->size[i].alg;
24664 if (candidate != libcall && alg_usable_p (candidate, memset))
24666 alg = candidate;
24667 alg_noalign = algs->size[i].noalign;
24669 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24670 last non-libcall inline algorithm. */
24671 if (TARGET_INLINE_ALL_STRINGOPS)
24673 /* When the current size is best to be copied by a libcall,
24674 but we are still forced to inline, run the heuristic below
24675 that will pick code for medium sized blocks. */
24676 if (alg != libcall)
24678 *noalign = alg_noalign;
24679 return alg;
24681 else if (!any_alg_usable_p)
24682 break;
24684 else if (alg_usable_p (candidate, memset))
24686 *noalign = algs->size[i].noalign;
24687 return candidate;
24692 /* When asked to inline the call anyway, try to pick meaningful choice.
24693 We look for maximal size of block that is faster to copy by hand and
24694 take blocks of at most of that size guessing that average size will
24695 be roughly half of the block.
24697 If this turns out to be bad, we might simply specify the preferred
24698 choice in ix86_costs. */
24699 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24700 && (algs->unknown_size == libcall
24701 || !alg_usable_p (algs->unknown_size, memset)))
24703 enum stringop_alg alg;
24705 /* If there aren't any usable algorithms, then recursing on
24706 smaller sizes isn't going to find anything. Just return the
24707 simple byte-at-a-time copy loop. */
24708 if (!any_alg_usable_p)
24710 /* Pick something reasonable. */
24711 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24712 *dynamic_check = 128;
24713 return loop_1_byte;
24715 if (max <= 0)
24716 max = 4096;
24717 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24718 zero_memset, dynamic_check, noalign);
24719 gcc_assert (*dynamic_check == -1);
24720 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24721 *dynamic_check = max;
24722 else
24723 gcc_assert (alg != libcall);
24724 return alg;
24726 return (alg_usable_p (algs->unknown_size, memset)
24727 ? algs->unknown_size : libcall);
24730 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24731 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24732 static int
24733 decide_alignment (int align,
24734 enum stringop_alg alg,
24735 int expected_size,
24736 machine_mode move_mode)
24738 int desired_align = 0;
24740 gcc_assert (alg != no_stringop);
24742 if (alg == libcall)
24743 return 0;
24744 if (move_mode == VOIDmode)
24745 return 0;
24747 desired_align = GET_MODE_SIZE (move_mode);
24748 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24749 copying whole cacheline at once. */
24750 if (TARGET_PENTIUMPRO
24751 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24752 desired_align = 8;
24754 if (optimize_size)
24755 desired_align = 1;
24756 if (desired_align < align)
24757 desired_align = align;
24758 if (expected_size != -1 && expected_size < 4)
24759 desired_align = align;
24761 return desired_align;
24765 /* Helper function for memcpy. For QImode value 0xXY produce
24766 0xXYXYXYXY of wide specified by MODE. This is essentially
24767 a * 0x10101010, but we can do slightly better than
24768 synth_mult by unwinding the sequence by hand on CPUs with
24769 slow multiply. */
24770 static rtx
24771 promote_duplicated_reg (machine_mode mode, rtx val)
24773 machine_mode valmode = GET_MODE (val);
24774 rtx tmp;
24775 int nops = mode == DImode ? 3 : 2;
24777 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24778 if (val == const0_rtx)
24779 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24780 if (CONST_INT_P (val))
24782 HOST_WIDE_INT v = INTVAL (val) & 255;
24784 v |= v << 8;
24785 v |= v << 16;
24786 if (mode == DImode)
24787 v |= (v << 16) << 16;
24788 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24791 if (valmode == VOIDmode)
24792 valmode = QImode;
24793 if (valmode != QImode)
24794 val = gen_lowpart (QImode, val);
24795 if (mode == QImode)
24796 return val;
24797 if (!TARGET_PARTIAL_REG_STALL)
24798 nops--;
24799 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24800 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24801 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24802 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24804 rtx reg = convert_modes (mode, QImode, val, true);
24805 tmp = promote_duplicated_reg (mode, const1_rtx);
24806 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24807 OPTAB_DIRECT);
24809 else
24811 rtx reg = convert_modes (mode, QImode, val, true);
24813 if (!TARGET_PARTIAL_REG_STALL)
24814 if (mode == SImode)
24815 emit_insn (gen_insvsi_1 (reg, reg));
24816 else
24817 emit_insn (gen_insvdi_1 (reg, reg));
24818 else
24820 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24821 NULL, 1, OPTAB_DIRECT);
24822 reg =
24823 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24825 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24826 NULL, 1, OPTAB_DIRECT);
24827 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24828 if (mode == SImode)
24829 return reg;
24830 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24831 NULL, 1, OPTAB_DIRECT);
24832 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24833 return reg;
24837 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24838 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24839 alignment from ALIGN to DESIRED_ALIGN. */
24840 static rtx
24841 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24842 int align)
24844 rtx promoted_val;
24846 if (TARGET_64BIT
24847 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24848 promoted_val = promote_duplicated_reg (DImode, val);
24849 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24850 promoted_val = promote_duplicated_reg (SImode, val);
24851 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24852 promoted_val = promote_duplicated_reg (HImode, val);
24853 else
24854 promoted_val = val;
24856 return promoted_val;
24859 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24860 operations when profitable. The code depends upon architecture, block size
24861 and alignment, but always has one of the following overall structures:
24863 Aligned move sequence:
24865 1) Prologue guard: Conditional that jumps up to epilogues for small
24866 blocks that can be handled by epilogue alone. This is faster
24867 but also needed for correctness, since prologue assume the block
24868 is larger than the desired alignment.
24870 Optional dynamic check for size and libcall for large
24871 blocks is emitted here too, with -minline-stringops-dynamically.
24873 2) Prologue: copy first few bytes in order to get destination
24874 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24875 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24876 copied. We emit either a jump tree on power of two sized
24877 blocks, or a byte loop.
24879 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24880 with specified algorithm.
24882 4) Epilogue: code copying tail of the block that is too small to be
24883 handled by main body (or up to size guarded by prologue guard).
24885 Misaligned move sequence
24887 1) missaligned move prologue/epilogue containing:
24888 a) Prologue handling small memory blocks and jumping to done_label
24889 (skipped if blocks are known to be large enough)
24890 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24891 needed by single possibly misaligned move
24892 (skipped if alignment is not needed)
24893 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24895 2) Zero size guard dispatching to done_label, if needed
24897 3) dispatch to library call, if needed,
24899 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24900 with specified algorithm. */
24901 bool
24902 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24903 rtx align_exp, rtx expected_align_exp,
24904 rtx expected_size_exp, rtx min_size_exp,
24905 rtx max_size_exp, rtx probable_max_size_exp,
24906 bool issetmem)
24908 rtx destreg;
24909 rtx srcreg = NULL;
24910 rtx_code_label *label = NULL;
24911 rtx tmp;
24912 rtx_code_label *jump_around_label = NULL;
24913 HOST_WIDE_INT align = 1;
24914 unsigned HOST_WIDE_INT count = 0;
24915 HOST_WIDE_INT expected_size = -1;
24916 int size_needed = 0, epilogue_size_needed;
24917 int desired_align = 0, align_bytes = 0;
24918 enum stringop_alg alg;
24919 rtx promoted_val = NULL;
24920 rtx vec_promoted_val = NULL;
24921 bool force_loopy_epilogue = false;
24922 int dynamic_check;
24923 bool need_zero_guard = false;
24924 bool noalign;
24925 machine_mode move_mode = VOIDmode;
24926 int unroll_factor = 1;
24927 /* TODO: Once value ranges are available, fill in proper data. */
24928 unsigned HOST_WIDE_INT min_size = 0;
24929 unsigned HOST_WIDE_INT max_size = -1;
24930 unsigned HOST_WIDE_INT probable_max_size = -1;
24931 bool misaligned_prologue_used = false;
24933 if (CONST_INT_P (align_exp))
24934 align = INTVAL (align_exp);
24935 /* i386 can do misaligned access on reasonably increased cost. */
24936 if (CONST_INT_P (expected_align_exp)
24937 && INTVAL (expected_align_exp) > align)
24938 align = INTVAL (expected_align_exp);
24939 /* ALIGN is the minimum of destination and source alignment, but we care here
24940 just about destination alignment. */
24941 else if (!issetmem
24942 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24943 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24945 if (CONST_INT_P (count_exp))
24947 min_size = max_size = probable_max_size = count = expected_size
24948 = INTVAL (count_exp);
24949 /* When COUNT is 0, there is nothing to do. */
24950 if (!count)
24951 return true;
24953 else
24955 if (min_size_exp)
24956 min_size = INTVAL (min_size_exp);
24957 if (max_size_exp)
24958 max_size = INTVAL (max_size_exp);
24959 if (probable_max_size_exp)
24960 probable_max_size = INTVAL (probable_max_size_exp);
24961 if (CONST_INT_P (expected_size_exp))
24962 expected_size = INTVAL (expected_size_exp);
24965 /* Make sure we don't need to care about overflow later on. */
24966 if (count > (HOST_WIDE_INT_1U << 30))
24967 return false;
24969 /* Step 0: Decide on preferred algorithm, desired alignment and
24970 size of chunks to be copied by main loop. */
24971 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24972 issetmem,
24973 issetmem && val_exp == const0_rtx,
24974 &dynamic_check, &noalign);
24975 if (alg == libcall)
24976 return false;
24977 gcc_assert (alg != no_stringop);
24979 /* For now vector-version of memset is generated only for memory zeroing, as
24980 creating of promoted vector value is very cheap in this case. */
24981 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24982 alg = unrolled_loop;
24984 if (!count)
24985 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24986 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24987 if (!issetmem)
24988 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24990 unroll_factor = 1;
24991 move_mode = word_mode;
24992 switch (alg)
24994 case libcall:
24995 case no_stringop:
24996 case last_alg:
24997 gcc_unreachable ();
24998 case loop_1_byte:
24999 need_zero_guard = true;
25000 move_mode = QImode;
25001 break;
25002 case loop:
25003 need_zero_guard = true;
25004 break;
25005 case unrolled_loop:
25006 need_zero_guard = true;
25007 unroll_factor = (TARGET_64BIT ? 4 : 2);
25008 break;
25009 case vector_loop:
25010 need_zero_guard = true;
25011 unroll_factor = 4;
25012 /* Find the widest supported mode. */
25013 move_mode = word_mode;
25014 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
25015 != CODE_FOR_nothing)
25016 move_mode = GET_MODE_WIDER_MODE (move_mode);
25018 /* Find the corresponding vector mode with the same size as MOVE_MODE.
25019 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
25020 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25022 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25023 move_mode = mode_for_vector (word_mode, nunits);
25024 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
25025 move_mode = word_mode;
25027 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
25028 break;
25029 case rep_prefix_8_byte:
25030 move_mode = DImode;
25031 break;
25032 case rep_prefix_4_byte:
25033 move_mode = SImode;
25034 break;
25035 case rep_prefix_1_byte:
25036 move_mode = QImode;
25037 break;
25039 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
25040 epilogue_size_needed = size_needed;
25042 desired_align = decide_alignment (align, alg, expected_size, move_mode);
25043 if (!TARGET_ALIGN_STRINGOPS || noalign)
25044 align = desired_align;
25046 /* Step 1: Prologue guard. */
25048 /* Alignment code needs count to be in register. */
25049 if (CONST_INT_P (count_exp) && desired_align > align)
25051 if (INTVAL (count_exp) > desired_align
25052 && INTVAL (count_exp) > size_needed)
25054 align_bytes
25055 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25056 if (align_bytes <= 0)
25057 align_bytes = 0;
25058 else
25059 align_bytes = desired_align - align_bytes;
25061 if (align_bytes == 0)
25062 count_exp = force_reg (counter_mode (count_exp), count_exp);
25064 gcc_assert (desired_align >= 1 && align >= 1);
25066 /* Misaligned move sequences handle both prologue and epilogue at once.
25067 Default code generation results in a smaller code for large alignments
25068 and also avoids redundant job when sizes are known precisely. */
25069 misaligned_prologue_used
25070 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25071 && MAX (desired_align, epilogue_size_needed) <= 32
25072 && desired_align <= epilogue_size_needed
25073 && ((desired_align > align && !align_bytes)
25074 || (!count && epilogue_size_needed > 1)));
25076 /* Do the cheap promotion to allow better CSE across the
25077 main loop and epilogue (ie one load of the big constant in the
25078 front of all code.
25079 For now the misaligned move sequences do not have fast path
25080 without broadcasting. */
25081 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25083 if (alg == vector_loop)
25085 gcc_assert (val_exp == const0_rtx);
25086 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25087 promoted_val = promote_duplicated_reg_to_size (val_exp,
25088 GET_MODE_SIZE (word_mode),
25089 desired_align, align);
25091 else
25093 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25094 desired_align, align);
25097 /* Misaligned move sequences handles both prologues and epilogues at once.
25098 Default code generation results in smaller code for large alignments and
25099 also avoids redundant job when sizes are known precisely. */
25100 if (misaligned_prologue_used)
25102 /* Misaligned move prologue handled small blocks by itself. */
25103 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25104 (dst, src, &destreg, &srcreg,
25105 move_mode, promoted_val, vec_promoted_val,
25106 &count_exp,
25107 &jump_around_label,
25108 desired_align < align
25109 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25110 desired_align, align, &min_size, dynamic_check, issetmem);
25111 if (!issetmem)
25112 src = change_address (src, BLKmode, srcreg);
25113 dst = change_address (dst, BLKmode, destreg);
25114 set_mem_align (dst, desired_align * BITS_PER_UNIT);
25115 epilogue_size_needed = 0;
25116 if (need_zero_guard
25117 && min_size < (unsigned HOST_WIDE_INT) size_needed)
25119 /* It is possible that we copied enough so the main loop will not
25120 execute. */
25121 gcc_assert (size_needed > 1);
25122 if (jump_around_label == NULL_RTX)
25123 jump_around_label = gen_label_rtx ();
25124 emit_cmp_and_jump_insns (count_exp,
25125 GEN_INT (size_needed),
25126 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25127 if (expected_size == -1
25128 || expected_size < (desired_align - align) / 2 + size_needed)
25129 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25130 else
25131 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25134 /* Ensure that alignment prologue won't copy past end of block. */
25135 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25137 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25138 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25139 Make sure it is power of 2. */
25140 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25142 /* To improve performance of small blocks, we jump around the VAL
25143 promoting mode. This mean that if the promoted VAL is not constant,
25144 we might not use it in the epilogue and have to use byte
25145 loop variant. */
25146 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25147 force_loopy_epilogue = true;
25148 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25149 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25151 /* If main algorithm works on QImode, no epilogue is needed.
25152 For small sizes just don't align anything. */
25153 if (size_needed == 1)
25154 desired_align = align;
25155 else
25156 goto epilogue;
25158 else if (!count
25159 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25161 label = gen_label_rtx ();
25162 emit_cmp_and_jump_insns (count_exp,
25163 GEN_INT (epilogue_size_needed),
25164 LTU, 0, counter_mode (count_exp), 1, label);
25165 if (expected_size == -1 || expected_size < epilogue_size_needed)
25166 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25167 else
25168 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25172 /* Emit code to decide on runtime whether library call or inline should be
25173 used. */
25174 if (dynamic_check != -1)
25176 if (!issetmem && CONST_INT_P (count_exp))
25178 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25180 emit_block_move_via_libcall (dst, src, count_exp, false);
25181 count_exp = const0_rtx;
25182 goto epilogue;
25185 else
25187 rtx_code_label *hot_label = gen_label_rtx ();
25188 if (jump_around_label == NULL_RTX)
25189 jump_around_label = gen_label_rtx ();
25190 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25191 LEU, 0, counter_mode (count_exp),
25192 1, hot_label);
25193 predict_jump (REG_BR_PROB_BASE * 90 / 100);
25194 if (issetmem)
25195 set_storage_via_libcall (dst, count_exp, val_exp, false);
25196 else
25197 emit_block_move_via_libcall (dst, src, count_exp, false);
25198 emit_jump (jump_around_label);
25199 emit_label (hot_label);
25203 /* Step 2: Alignment prologue. */
25204 /* Do the expensive promotion once we branched off the small blocks. */
25205 if (issetmem && !promoted_val)
25206 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25207 desired_align, align);
25209 if (desired_align > align && !misaligned_prologue_used)
25211 if (align_bytes == 0)
25213 /* Except for the first move in prologue, we no longer know
25214 constant offset in aliasing info. It don't seems to worth
25215 the pain to maintain it for the first move, so throw away
25216 the info early. */
25217 dst = change_address (dst, BLKmode, destreg);
25218 if (!issetmem)
25219 src = change_address (src, BLKmode, srcreg);
25220 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25221 promoted_val, vec_promoted_val,
25222 count_exp, align, desired_align,
25223 issetmem);
25224 /* At most desired_align - align bytes are copied. */
25225 if (min_size < (unsigned)(desired_align - align))
25226 min_size = 0;
25227 else
25228 min_size -= desired_align - align;
25230 else
25232 /* If we know how many bytes need to be stored before dst is
25233 sufficiently aligned, maintain aliasing info accurately. */
25234 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25235 srcreg,
25236 promoted_val,
25237 vec_promoted_val,
25238 desired_align,
25239 align_bytes,
25240 issetmem);
25242 count_exp = plus_constant (counter_mode (count_exp),
25243 count_exp, -align_bytes);
25244 count -= align_bytes;
25245 min_size -= align_bytes;
25246 max_size -= align_bytes;
25248 if (need_zero_guard
25249 && min_size < (unsigned HOST_WIDE_INT) size_needed
25250 && (count < (unsigned HOST_WIDE_INT) size_needed
25251 || (align_bytes == 0
25252 && count < ((unsigned HOST_WIDE_INT) size_needed
25253 + desired_align - align))))
25255 /* It is possible that we copied enough so the main loop will not
25256 execute. */
25257 gcc_assert (size_needed > 1);
25258 if (label == NULL_RTX)
25259 label = gen_label_rtx ();
25260 emit_cmp_and_jump_insns (count_exp,
25261 GEN_INT (size_needed),
25262 LTU, 0, counter_mode (count_exp), 1, label);
25263 if (expected_size == -1
25264 || expected_size < (desired_align - align) / 2 + size_needed)
25265 predict_jump (REG_BR_PROB_BASE * 20 / 100);
25266 else
25267 predict_jump (REG_BR_PROB_BASE * 60 / 100);
25270 if (label && size_needed == 1)
25272 emit_label (label);
25273 LABEL_NUSES (label) = 1;
25274 label = NULL;
25275 epilogue_size_needed = 1;
25276 if (issetmem)
25277 promoted_val = val_exp;
25279 else if (label == NULL_RTX && !misaligned_prologue_used)
25280 epilogue_size_needed = size_needed;
25282 /* Step 3: Main loop. */
25284 switch (alg)
25286 case libcall:
25287 case no_stringop:
25288 case last_alg:
25289 gcc_unreachable ();
25290 case loop_1_byte:
25291 case loop:
25292 case unrolled_loop:
25293 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25294 count_exp, move_mode, unroll_factor,
25295 expected_size, issetmem);
25296 break;
25297 case vector_loop:
25298 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25299 vec_promoted_val, count_exp, move_mode,
25300 unroll_factor, expected_size, issetmem);
25301 break;
25302 case rep_prefix_8_byte:
25303 case rep_prefix_4_byte:
25304 case rep_prefix_1_byte:
25305 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25306 val_exp, count_exp, move_mode, issetmem);
25307 break;
25309 /* Adjust properly the offset of src and dest memory for aliasing. */
25310 if (CONST_INT_P (count_exp))
25312 if (!issetmem)
25313 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25314 (count / size_needed) * size_needed);
25315 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25316 (count / size_needed) * size_needed);
25318 else
25320 if (!issetmem)
25321 src = change_address (src, BLKmode, srcreg);
25322 dst = change_address (dst, BLKmode, destreg);
25325 /* Step 4: Epilogue to copy the remaining bytes. */
25326 epilogue:
25327 if (label)
25329 /* When the main loop is done, COUNT_EXP might hold original count,
25330 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25331 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25332 bytes. Compensate if needed. */
25334 if (size_needed < epilogue_size_needed)
25336 tmp =
25337 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25338 GEN_INT (size_needed - 1), count_exp, 1,
25339 OPTAB_DIRECT);
25340 if (tmp != count_exp)
25341 emit_move_insn (count_exp, tmp);
25343 emit_label (label);
25344 LABEL_NUSES (label) = 1;
25347 if (count_exp != const0_rtx && epilogue_size_needed > 1)
25349 if (force_loopy_epilogue)
25350 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25351 epilogue_size_needed);
25352 else
25354 if (issetmem)
25355 expand_setmem_epilogue (dst, destreg, promoted_val,
25356 vec_promoted_val, count_exp,
25357 epilogue_size_needed);
25358 else
25359 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25360 epilogue_size_needed);
25363 if (jump_around_label)
25364 emit_label (jump_around_label);
25365 return true;
25369 /* Expand the appropriate insns for doing strlen if not just doing
25370 repnz; scasb
25372 out = result, initialized with the start address
25373 align_rtx = alignment of the address.
25374 scratch = scratch register, initialized with the startaddress when
25375 not aligned, otherwise undefined
25377 This is just the body. It needs the initializations mentioned above and
25378 some address computing at the end. These things are done in i386.md. */
25380 static void
25381 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25383 int align;
25384 rtx tmp;
25385 rtx_code_label *align_2_label = NULL;
25386 rtx_code_label *align_3_label = NULL;
25387 rtx_code_label *align_4_label = gen_label_rtx ();
25388 rtx_code_label *end_0_label = gen_label_rtx ();
25389 rtx mem;
25390 rtx tmpreg = gen_reg_rtx (SImode);
25391 rtx scratch = gen_reg_rtx (SImode);
25392 rtx cmp;
25394 align = 0;
25395 if (CONST_INT_P (align_rtx))
25396 align = INTVAL (align_rtx);
25398 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
25400 /* Is there a known alignment and is it less than 4? */
25401 if (align < 4)
25403 rtx scratch1 = gen_reg_rtx (Pmode);
25404 emit_move_insn (scratch1, out);
25405 /* Is there a known alignment and is it not 2? */
25406 if (align != 2)
25408 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25409 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25411 /* Leave just the 3 lower bits. */
25412 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25413 NULL_RTX, 0, OPTAB_WIDEN);
25415 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25416 Pmode, 1, align_4_label);
25417 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25418 Pmode, 1, align_2_label);
25419 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25420 Pmode, 1, align_3_label);
25422 else
25424 /* Since the alignment is 2, we have to check 2 or 0 bytes;
25425 check if is aligned to 4 - byte. */
25427 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25428 NULL_RTX, 0, OPTAB_WIDEN);
25430 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25431 Pmode, 1, align_4_label);
25434 mem = change_address (src, QImode, out);
25436 /* Now compare the bytes. */
25438 /* Compare the first n unaligned byte on a byte per byte basis. */
25439 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25440 QImode, 1, end_0_label);
25442 /* Increment the address. */
25443 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25445 /* Not needed with an alignment of 2 */
25446 if (align != 2)
25448 emit_label (align_2_label);
25450 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25451 end_0_label);
25453 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25455 emit_label (align_3_label);
25458 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25459 end_0_label);
25461 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25464 /* Generate loop to check 4 bytes at a time. It is not a good idea to
25465 align this loop. It gives only huge programs, but does not help to
25466 speed up. */
25467 emit_label (align_4_label);
25469 mem = change_address (src, SImode, out);
25470 emit_move_insn (scratch, mem);
25471 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25473 /* This formula yields a nonzero result iff one of the bytes is zero.
25474 This saves three branches inside loop and many cycles. */
25476 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25477 emit_insn (gen_one_cmplsi2 (scratch, scratch));
25478 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25479 emit_insn (gen_andsi3 (tmpreg, tmpreg,
25480 gen_int_mode (0x80808080, SImode)));
25481 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25482 align_4_label);
25484 if (TARGET_CMOVE)
25486 rtx reg = gen_reg_rtx (SImode);
25487 rtx reg2 = gen_reg_rtx (Pmode);
25488 emit_move_insn (reg, tmpreg);
25489 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25491 /* If zero is not in the first two bytes, move two bytes forward. */
25492 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25493 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25494 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25495 emit_insn (gen_rtx_SET (tmpreg,
25496 gen_rtx_IF_THEN_ELSE (SImode, tmp,
25497 reg,
25498 tmpreg)));
25499 /* Emit lea manually to avoid clobbering of flags. */
25500 emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx)));
25502 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25503 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25504 emit_insn (gen_rtx_SET (out,
25505 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25506 reg2,
25507 out)));
25509 else
25511 rtx_code_label *end_2_label = gen_label_rtx ();
25512 /* Is zero in the first two bytes? */
25514 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25515 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25516 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25517 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25518 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25519 pc_rtx);
25520 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
25521 JUMP_LABEL (tmp) = end_2_label;
25523 /* Not in the first two. Move two bytes forward. */
25524 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25525 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25527 emit_label (end_2_label);
25531 /* Avoid branch in fixing the byte. */
25532 tmpreg = gen_lowpart (QImode, tmpreg);
25533 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
25534 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25535 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25536 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25538 emit_label (end_0_label);
25541 /* Expand strlen. */
25543 bool
25544 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25546 rtx addr, scratch1, scratch2, scratch3, scratch4;
25548 /* The generic case of strlen expander is long. Avoid it's
25549 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
25551 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25552 && !TARGET_INLINE_ALL_STRINGOPS
25553 && !optimize_insn_for_size_p ()
25554 && (!CONST_INT_P (align) || INTVAL (align) < 4))
25555 return false;
25557 addr = force_reg (Pmode, XEXP (src, 0));
25558 scratch1 = gen_reg_rtx (Pmode);
25560 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25561 && !optimize_insn_for_size_p ())
25563 /* Well it seems that some optimizer does not combine a call like
25564 foo(strlen(bar), strlen(bar));
25565 when the move and the subtraction is done here. It does calculate
25566 the length just once when these instructions are done inside of
25567 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
25568 often used and I use one fewer register for the lifetime of
25569 output_strlen_unroll() this is better. */
25571 emit_move_insn (out, addr);
25573 ix86_expand_strlensi_unroll_1 (out, src, align);
25575 /* strlensi_unroll_1 returns the address of the zero at the end of
25576 the string, like memchr(), so compute the length by subtracting
25577 the start address. */
25578 emit_insn (ix86_gen_sub3 (out, out, addr));
25580 else
25582 rtx unspec;
25584 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25585 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25586 return false;
25588 scratch2 = gen_reg_rtx (Pmode);
25589 scratch3 = gen_reg_rtx (Pmode);
25590 scratch4 = force_reg (Pmode, constm1_rtx);
25592 emit_move_insn (scratch3, addr);
25593 eoschar = force_reg (QImode, eoschar);
25595 src = replace_equiv_address_nv (src, scratch3);
25597 /* If .md starts supporting :P, this can be done in .md. */
25598 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25599 scratch4), UNSPEC_SCAS);
25600 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25601 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25602 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25604 return true;
25607 /* For given symbol (function) construct code to compute address of it's PLT
25608 entry in large x86-64 PIC model. */
25609 static rtx
25610 construct_plt_address (rtx symbol)
25612 rtx tmp, unspec;
25614 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25615 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25616 gcc_assert (Pmode == DImode);
25618 tmp = gen_reg_rtx (Pmode);
25619 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25621 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25622 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25623 return tmp;
25627 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25628 rtx callarg2,
25629 rtx pop, bool sibcall)
25631 rtx vec[3];
25632 rtx use = NULL, call;
25633 unsigned int vec_len = 0;
25635 if (pop == const0_rtx)
25636 pop = NULL;
25637 gcc_assert (!TARGET_64BIT || !pop);
25639 if (TARGET_MACHO && !TARGET_64BIT)
25641 #if TARGET_MACHO
25642 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25643 fnaddr = machopic_indirect_call_target (fnaddr);
25644 #endif
25646 else
25648 /* Static functions and indirect calls don't need the pic register. Also,
25649 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
25650 it an indirect call. */
25651 if (flag_pic
25652 && (!TARGET_64BIT
25653 || (ix86_cmodel == CM_LARGE_PIC
25654 && DEFAULT_ABI != MS_ABI))
25655 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25656 && !SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))
25657 && flag_plt
25658 && (SYMBOL_REF_DECL ((XEXP (fnaddr, 0))) == NULL_TREE
25659 || !lookup_attribute ("noplt",
25660 DECL_ATTRIBUTES (SYMBOL_REF_DECL (XEXP (fnaddr, 0))))))
25662 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25663 if (ix86_use_pseudo_pic_reg ())
25664 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25665 pic_offset_table_rtx);
25669 /* Skip setting up RAX register for -mskip-rax-setup when there are no
25670 parameters passed in vector registers. */
25671 if (TARGET_64BIT
25672 && (INTVAL (callarg2) > 0
25673 || (INTVAL (callarg2) == 0
25674 && (TARGET_SSE || !flag_skip_rax_setup))))
25676 rtx al = gen_rtx_REG (QImode, AX_REG);
25677 emit_move_insn (al, callarg2);
25678 use_reg (&use, al);
25681 if (ix86_cmodel == CM_LARGE_PIC
25682 && !TARGET_PECOFF
25683 && MEM_P (fnaddr)
25684 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25685 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25686 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25687 else if (sibcall
25688 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25689 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25691 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25692 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25695 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25697 if (retval)
25699 /* We should add bounds as destination register in case
25700 pointer with bounds may be returned. */
25701 if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25703 rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25704 rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25705 if (GET_CODE (retval) == PARALLEL)
25707 b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25708 b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25709 rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25710 retval = chkp_join_splitted_slot (retval, par);
25712 else
25714 retval = gen_rtx_PARALLEL (VOIDmode,
25715 gen_rtvec (3, retval, b0, b1));
25716 chkp_put_regs_to_expr_list (retval);
25720 call = gen_rtx_SET (retval, call);
25722 vec[vec_len++] = call;
25724 if (pop)
25726 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25727 pop = gen_rtx_SET (stack_pointer_rtx, pop);
25728 vec[vec_len++] = pop;
25731 if (TARGET_64BIT_MS_ABI
25732 && (!callarg2 || INTVAL (callarg2) != -2))
25734 int const cregs_size
25735 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25736 int i;
25738 for (i = 0; i < cregs_size; i++)
25740 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25741 machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25743 clobber_reg (&use, gen_rtx_REG (mode, regno));
25747 if (vec_len > 1)
25748 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25749 call = emit_call_insn (call);
25750 if (use)
25751 CALL_INSN_FUNCTION_USAGE (call) = use;
25753 return call;
25756 /* Return true if the function being called was marked with attribute "noplt"
25757 or using -fno-plt and we are compiling for non-PIC and x86_64. We need to
25758 handle the non-PIC case in the backend because there is no easy interface
25759 for the front-end to force non-PLT calls to use the GOT. This is currently
25760 used only with 64-bit ELF targets to call the function marked "noplt"
25761 indirectly. */
25763 static bool
25764 ix86_nopic_noplt_attribute_p (rtx call_op)
25766 if (flag_pic || ix86_cmodel == CM_LARGE
25767 || !TARGET_64BIT || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
25768 || SYMBOL_REF_LOCAL_P (call_op))
25769 return false;
25771 tree symbol_decl = SYMBOL_REF_DECL (call_op);
25773 if (!flag_plt
25774 || (symbol_decl != NULL_TREE
25775 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
25776 return true;
25778 return false;
25781 /* Output the assembly for a call instruction. */
25783 const char *
25784 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25786 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25787 bool seh_nop_p = false;
25788 const char *xasm;
25790 if (SIBLING_CALL_P (insn))
25792 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25793 xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
25794 else if (direct_p)
25795 xasm = "%!jmp\t%P0";
25796 /* SEH epilogue detection requires the indirect branch case
25797 to include REX.W. */
25798 else if (TARGET_SEH)
25799 xasm = "%!rex.W jmp %A0";
25800 else
25801 xasm = "%!jmp\t%A0";
25803 output_asm_insn (xasm, &call_op);
25804 return "";
25807 /* SEH unwinding can require an extra nop to be emitted in several
25808 circumstances. Determine if we have one of those. */
25809 if (TARGET_SEH)
25811 rtx_insn *i;
25813 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25815 /* If we get to another real insn, we don't need the nop. */
25816 if (INSN_P (i))
25817 break;
25819 /* If we get to the epilogue note, prevent a catch region from
25820 being adjacent to the standard epilogue sequence. If non-
25821 call-exceptions, we'll have done this during epilogue emission. */
25822 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25823 && !flag_non_call_exceptions
25824 && !can_throw_internal (insn))
25826 seh_nop_p = true;
25827 break;
25831 /* If we didn't find a real insn following the call, prevent the
25832 unwinder from looking into the next function. */
25833 if (i == NULL)
25834 seh_nop_p = true;
25837 if (direct_p && ix86_nopic_noplt_attribute_p (call_op))
25838 xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
25839 else if (direct_p)
25840 xasm = "%!call\t%P0";
25841 else
25842 xasm = "%!call\t%A0";
25844 output_asm_insn (xasm, &call_op);
25846 if (seh_nop_p)
25847 return "nop";
25849 return "";
25852 /* Clear stack slot assignments remembered from previous functions.
25853 This is called from INIT_EXPANDERS once before RTL is emitted for each
25854 function. */
25856 static struct machine_function *
25857 ix86_init_machine_status (void)
25859 struct machine_function *f;
25861 f = ggc_cleared_alloc<machine_function> ();
25862 f->use_fast_prologue_epilogue_nregs = -1;
25863 f->call_abi = ix86_abi;
25865 return f;
25868 /* Return a MEM corresponding to a stack slot with mode MODE.
25869 Allocate a new slot if necessary.
25871 The RTL for a function can have several slots available: N is
25872 which slot to use. */
25875 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25877 struct stack_local_entry *s;
25879 gcc_assert (n < MAX_386_STACK_LOCALS);
25881 for (s = ix86_stack_locals; s; s = s->next)
25882 if (s->mode == mode && s->n == n)
25883 return validize_mem (copy_rtx (s->rtl));
25885 s = ggc_alloc<stack_local_entry> ();
25886 s->n = n;
25887 s->mode = mode;
25888 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25890 s->next = ix86_stack_locals;
25891 ix86_stack_locals = s;
25892 return validize_mem (copy_rtx (s->rtl));
25895 static void
25896 ix86_instantiate_decls (void)
25898 struct stack_local_entry *s;
25900 for (s = ix86_stack_locals; s; s = s->next)
25901 if (s->rtl != NULL_RTX)
25902 instantiate_decl_rtl (s->rtl);
25905 /* Check whether x86 address PARTS is a pc-relative address. */
25907 static bool
25908 rip_relative_addr_p (struct ix86_address *parts)
25910 rtx base, index, disp;
25912 base = parts->base;
25913 index = parts->index;
25914 disp = parts->disp;
25916 if (disp && !base && !index)
25918 if (TARGET_64BIT)
25920 rtx symbol = disp;
25922 if (GET_CODE (disp) == CONST)
25923 symbol = XEXP (disp, 0);
25924 if (GET_CODE (symbol) == PLUS
25925 && CONST_INT_P (XEXP (symbol, 1)))
25926 symbol = XEXP (symbol, 0);
25928 if (GET_CODE (symbol) == LABEL_REF
25929 || (GET_CODE (symbol) == SYMBOL_REF
25930 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25931 || (GET_CODE (symbol) == UNSPEC
25932 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25933 || XINT (symbol, 1) == UNSPEC_PCREL
25934 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25935 return true;
25938 return false;
25941 /* Calculate the length of the memory address in the instruction encoding.
25942 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25943 or other prefixes. We never generate addr32 prefix for LEA insn. */
25946 memory_address_length (rtx addr, bool lea)
25948 struct ix86_address parts;
25949 rtx base, index, disp;
25950 int len;
25951 int ok;
25953 if (GET_CODE (addr) == PRE_DEC
25954 || GET_CODE (addr) == POST_INC
25955 || GET_CODE (addr) == PRE_MODIFY
25956 || GET_CODE (addr) == POST_MODIFY)
25957 return 0;
25959 ok = ix86_decompose_address (addr, &parts);
25960 gcc_assert (ok);
25962 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25964 /* If this is not LEA instruction, add the length of addr32 prefix. */
25965 if (TARGET_64BIT && !lea
25966 && (SImode_address_operand (addr, VOIDmode)
25967 || (parts.base && GET_MODE (parts.base) == SImode)
25968 || (parts.index && GET_MODE (parts.index) == SImode)))
25969 len++;
25971 base = parts.base;
25972 index = parts.index;
25973 disp = parts.disp;
25975 if (base && GET_CODE (base) == SUBREG)
25976 base = SUBREG_REG (base);
25977 if (index && GET_CODE (index) == SUBREG)
25978 index = SUBREG_REG (index);
25980 gcc_assert (base == NULL_RTX || REG_P (base));
25981 gcc_assert (index == NULL_RTX || REG_P (index));
25983 /* Rule of thumb:
25984 - esp as the base always wants an index,
25985 - ebp as the base always wants a displacement,
25986 - r12 as the base always wants an index,
25987 - r13 as the base always wants a displacement. */
25989 /* Register Indirect. */
25990 if (base && !index && !disp)
25992 /* esp (for its index) and ebp (for its displacement) need
25993 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25994 code. */
25995 if (base == arg_pointer_rtx
25996 || base == frame_pointer_rtx
25997 || REGNO (base) == SP_REG
25998 || REGNO (base) == BP_REG
25999 || REGNO (base) == R12_REG
26000 || REGNO (base) == R13_REG)
26001 len++;
26004 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
26005 is not disp32, but disp32(%rip), so for disp32
26006 SIB byte is needed, unless print_operand_address
26007 optimizes it into disp32(%rip) or (%rip) is implied
26008 by UNSPEC. */
26009 else if (disp && !base && !index)
26011 len += 4;
26012 if (rip_relative_addr_p (&parts))
26013 len++;
26015 else
26017 /* Find the length of the displacement constant. */
26018 if (disp)
26020 if (base && satisfies_constraint_K (disp))
26021 len += 1;
26022 else
26023 len += 4;
26025 /* ebp always wants a displacement. Similarly r13. */
26026 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
26027 len++;
26029 /* An index requires the two-byte modrm form.... */
26030 if (index
26031 /* ...like esp (or r12), which always wants an index. */
26032 || base == arg_pointer_rtx
26033 || base == frame_pointer_rtx
26034 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
26035 len++;
26038 return len;
26041 /* Compute default value for "length_immediate" attribute. When SHORTFORM
26042 is set, expect that insn have 8bit immediate alternative. */
26044 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
26046 int len = 0;
26047 int i;
26048 extract_insn_cached (insn);
26049 for (i = recog_data.n_operands - 1; i >= 0; --i)
26050 if (CONSTANT_P (recog_data.operand[i]))
26052 enum attr_mode mode = get_attr_mode (insn);
26054 gcc_assert (!len);
26055 if (shortform && CONST_INT_P (recog_data.operand[i]))
26057 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26058 switch (mode)
26060 case MODE_QI:
26061 len = 1;
26062 continue;
26063 case MODE_HI:
26064 ival = trunc_int_for_mode (ival, HImode);
26065 break;
26066 case MODE_SI:
26067 ival = trunc_int_for_mode (ival, SImode);
26068 break;
26069 default:
26070 break;
26072 if (IN_RANGE (ival, -128, 127))
26074 len = 1;
26075 continue;
26078 switch (mode)
26080 case MODE_QI:
26081 len = 1;
26082 break;
26083 case MODE_HI:
26084 len = 2;
26085 break;
26086 case MODE_SI:
26087 len = 4;
26088 break;
26089 /* Immediates for DImode instructions are encoded
26090 as 32bit sign extended values. */
26091 case MODE_DI:
26092 len = 4;
26093 break;
26094 default:
26095 fatal_insn ("unknown insn mode", insn);
26098 return len;
26101 /* Compute default value for "length_address" attribute. */
26103 ix86_attr_length_address_default (rtx_insn *insn)
26105 int i;
26107 if (get_attr_type (insn) == TYPE_LEA)
26109 rtx set = PATTERN (insn), addr;
26111 if (GET_CODE (set) == PARALLEL)
26112 set = XVECEXP (set, 0, 0);
26114 gcc_assert (GET_CODE (set) == SET);
26116 addr = SET_SRC (set);
26118 return memory_address_length (addr, true);
26121 extract_insn_cached (insn);
26122 for (i = recog_data.n_operands - 1; i >= 0; --i)
26123 if (MEM_P (recog_data.operand[i]))
26125 constrain_operands_cached (insn, reload_completed);
26126 if (which_alternative != -1)
26128 const char *constraints = recog_data.constraints[i];
26129 int alt = which_alternative;
26131 while (*constraints == '=' || *constraints == '+')
26132 constraints++;
26133 while (alt-- > 0)
26134 while (*constraints++ != ',')
26136 /* Skip ignored operands. */
26137 if (*constraints == 'X')
26138 continue;
26140 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26142 return 0;
26145 /* Compute default value for "length_vex" attribute. It includes
26146 2 or 3 byte VEX prefix and 1 opcode byte. */
26149 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26150 bool has_vex_w)
26152 int i;
26154 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
26155 byte VEX prefix. */
26156 if (!has_0f_opcode || has_vex_w)
26157 return 3 + 1;
26159 /* We can always use 2 byte VEX prefix in 32bit. */
26160 if (!TARGET_64BIT)
26161 return 2 + 1;
26163 extract_insn_cached (insn);
26165 for (i = recog_data.n_operands - 1; i >= 0; --i)
26166 if (REG_P (recog_data.operand[i]))
26168 /* REX.W bit uses 3 byte VEX prefix. */
26169 if (GET_MODE (recog_data.operand[i]) == DImode
26170 && GENERAL_REG_P (recog_data.operand[i]))
26171 return 3 + 1;
26173 else
26175 /* REX.X or REX.B bits use 3 byte VEX prefix. */
26176 if (MEM_P (recog_data.operand[i])
26177 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26178 return 3 + 1;
26181 return 2 + 1;
26184 /* Return the maximum number of instructions a cpu can issue. */
26186 static int
26187 ix86_issue_rate (void)
26189 switch (ix86_tune)
26191 case PROCESSOR_PENTIUM:
26192 case PROCESSOR_IAMCU:
26193 case PROCESSOR_BONNELL:
26194 case PROCESSOR_SILVERMONT:
26195 case PROCESSOR_KNL:
26196 case PROCESSOR_INTEL:
26197 case PROCESSOR_K6:
26198 case PROCESSOR_BTVER2:
26199 case PROCESSOR_PENTIUM4:
26200 case PROCESSOR_NOCONA:
26201 return 2;
26203 case PROCESSOR_PENTIUMPRO:
26204 case PROCESSOR_ATHLON:
26205 case PROCESSOR_K8:
26206 case PROCESSOR_AMDFAM10:
26207 case PROCESSOR_GENERIC:
26208 case PROCESSOR_BTVER1:
26209 return 3;
26211 case PROCESSOR_BDVER1:
26212 case PROCESSOR_BDVER2:
26213 case PROCESSOR_BDVER3:
26214 case PROCESSOR_BDVER4:
26215 case PROCESSOR_CORE2:
26216 case PROCESSOR_NEHALEM:
26217 case PROCESSOR_SANDYBRIDGE:
26218 case PROCESSOR_HASWELL:
26219 return 4;
26221 default:
26222 return 1;
26226 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26227 by DEP_INSN and nothing set by DEP_INSN. */
26229 static bool
26230 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26232 rtx set, set2;
26234 /* Simplify the test for uninteresting insns. */
26235 if (insn_type != TYPE_SETCC
26236 && insn_type != TYPE_ICMOV
26237 && insn_type != TYPE_FCMOV
26238 && insn_type != TYPE_IBR)
26239 return false;
26241 if ((set = single_set (dep_insn)) != 0)
26243 set = SET_DEST (set);
26244 set2 = NULL_RTX;
26246 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26247 && XVECLEN (PATTERN (dep_insn), 0) == 2
26248 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26249 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26251 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26252 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26254 else
26255 return false;
26257 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26258 return false;
26260 /* This test is true if the dependent insn reads the flags but
26261 not any other potentially set register. */
26262 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26263 return false;
26265 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26266 return false;
26268 return true;
26271 /* Return true iff USE_INSN has a memory address with operands set by
26272 SET_INSN. */
26274 bool
26275 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26277 int i;
26278 extract_insn_cached (use_insn);
26279 for (i = recog_data.n_operands - 1; i >= 0; --i)
26280 if (MEM_P (recog_data.operand[i]))
26282 rtx addr = XEXP (recog_data.operand[i], 0);
26283 return modified_in_p (addr, set_insn) != 0;
26285 return false;
26288 /* Helper function for exact_store_load_dependency.
26289 Return true if addr is found in insn. */
26290 static bool
26291 exact_dependency_1 (rtx addr, rtx insn)
26293 enum rtx_code code;
26294 const char *format_ptr;
26295 int i, j;
26297 code = GET_CODE (insn);
26298 switch (code)
26300 case MEM:
26301 if (rtx_equal_p (addr, insn))
26302 return true;
26303 break;
26304 case REG:
26305 CASE_CONST_ANY:
26306 case SYMBOL_REF:
26307 case CODE_LABEL:
26308 case PC:
26309 case CC0:
26310 case EXPR_LIST:
26311 return false;
26312 default:
26313 break;
26316 format_ptr = GET_RTX_FORMAT (code);
26317 for (i = 0; i < GET_RTX_LENGTH (code); i++)
26319 switch (*format_ptr++)
26321 case 'e':
26322 if (exact_dependency_1 (addr, XEXP (insn, i)))
26323 return true;
26324 break;
26325 case 'E':
26326 for (j = 0; j < XVECLEN (insn, i); j++)
26327 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26328 return true;
26329 break;
26332 return false;
26335 /* Return true if there exists exact dependency for store & load, i.e.
26336 the same memory address is used in them. */
26337 static bool
26338 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26340 rtx set1, set2;
26342 set1 = single_set (store);
26343 if (!set1)
26344 return false;
26345 if (!MEM_P (SET_DEST (set1)))
26346 return false;
26347 set2 = single_set (load);
26348 if (!set2)
26349 return false;
26350 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26351 return true;
26352 return false;
26355 static int
26356 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26358 enum attr_type insn_type, dep_insn_type;
26359 enum attr_memory memory;
26360 rtx set, set2;
26361 int dep_insn_code_number;
26363 /* Anti and output dependencies have zero cost on all CPUs. */
26364 if (REG_NOTE_KIND (link) != 0)
26365 return 0;
26367 dep_insn_code_number = recog_memoized (dep_insn);
26369 /* If we can't recognize the insns, we can't really do anything. */
26370 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26371 return cost;
26373 insn_type = get_attr_type (insn);
26374 dep_insn_type = get_attr_type (dep_insn);
26376 switch (ix86_tune)
26378 case PROCESSOR_PENTIUM:
26379 case PROCESSOR_IAMCU:
26380 /* Address Generation Interlock adds a cycle of latency. */
26381 if (insn_type == TYPE_LEA)
26383 rtx addr = PATTERN (insn);
26385 if (GET_CODE (addr) == PARALLEL)
26386 addr = XVECEXP (addr, 0, 0);
26388 gcc_assert (GET_CODE (addr) == SET);
26390 addr = SET_SRC (addr);
26391 if (modified_in_p (addr, dep_insn))
26392 cost += 1;
26394 else if (ix86_agi_dependent (dep_insn, insn))
26395 cost += 1;
26397 /* ??? Compares pair with jump/setcc. */
26398 if (ix86_flags_dependent (insn, dep_insn, insn_type))
26399 cost = 0;
26401 /* Floating point stores require value to be ready one cycle earlier. */
26402 if (insn_type == TYPE_FMOV
26403 && get_attr_memory (insn) == MEMORY_STORE
26404 && !ix86_agi_dependent (dep_insn, insn))
26405 cost += 1;
26406 break;
26408 case PROCESSOR_PENTIUMPRO:
26409 /* INT->FP conversion is expensive. */
26410 if (get_attr_fp_int_src (dep_insn))
26411 cost += 5;
26413 /* There is one cycle extra latency between an FP op and a store. */
26414 if (insn_type == TYPE_FMOV
26415 && (set = single_set (dep_insn)) != NULL_RTX
26416 && (set2 = single_set (insn)) != NULL_RTX
26417 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26418 && MEM_P (SET_DEST (set2)))
26419 cost += 1;
26421 memory = get_attr_memory (insn);
26423 /* Show ability of reorder buffer to hide latency of load by executing
26424 in parallel with previous instruction in case
26425 previous instruction is not needed to compute the address. */
26426 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26427 && !ix86_agi_dependent (dep_insn, insn))
26429 /* Claim moves to take one cycle, as core can issue one load
26430 at time and the next load can start cycle later. */
26431 if (dep_insn_type == TYPE_IMOV
26432 || dep_insn_type == TYPE_FMOV)
26433 cost = 1;
26434 else if (cost > 1)
26435 cost--;
26437 break;
26439 case PROCESSOR_K6:
26440 /* The esp dependency is resolved before
26441 the instruction is really finished. */
26442 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26443 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26444 return 1;
26446 /* INT->FP conversion is expensive. */
26447 if (get_attr_fp_int_src (dep_insn))
26448 cost += 5;
26450 memory = get_attr_memory (insn);
26452 /* Show ability of reorder buffer to hide latency of load by executing
26453 in parallel with previous instruction in case
26454 previous instruction is not needed to compute the address. */
26455 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26456 && !ix86_agi_dependent (dep_insn, insn))
26458 /* Claim moves to take one cycle, as core can issue one load
26459 at time and the next load can start cycle later. */
26460 if (dep_insn_type == TYPE_IMOV
26461 || dep_insn_type == TYPE_FMOV)
26462 cost = 1;
26463 else if (cost > 2)
26464 cost -= 2;
26465 else
26466 cost = 1;
26468 break;
26470 case PROCESSOR_AMDFAM10:
26471 case PROCESSOR_BDVER1:
26472 case PROCESSOR_BDVER2:
26473 case PROCESSOR_BDVER3:
26474 case PROCESSOR_BDVER4:
26475 case PROCESSOR_BTVER1:
26476 case PROCESSOR_BTVER2:
26477 case PROCESSOR_GENERIC:
26478 /* Stack engine allows to execute push&pop instructions in parall. */
26479 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26480 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26481 return 0;
26482 /* FALLTHRU */
26484 case PROCESSOR_ATHLON:
26485 case PROCESSOR_K8:
26486 memory = get_attr_memory (insn);
26488 /* Show ability of reorder buffer to hide latency of load by executing
26489 in parallel with previous instruction in case
26490 previous instruction is not needed to compute the address. */
26491 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26492 && !ix86_agi_dependent (dep_insn, insn))
26494 enum attr_unit unit = get_attr_unit (insn);
26495 int loadcost = 3;
26497 /* Because of the difference between the length of integer and
26498 floating unit pipeline preparation stages, the memory operands
26499 for floating point are cheaper.
26501 ??? For Athlon it the difference is most probably 2. */
26502 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26503 loadcost = 3;
26504 else
26505 loadcost = TARGET_ATHLON ? 2 : 0;
26507 if (cost >= loadcost)
26508 cost -= loadcost;
26509 else
26510 cost = 0;
26512 break;
26514 case PROCESSOR_CORE2:
26515 case PROCESSOR_NEHALEM:
26516 case PROCESSOR_SANDYBRIDGE:
26517 case PROCESSOR_HASWELL:
26518 /* Stack engine allows to execute push&pop instructions in parall. */
26519 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26520 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26521 return 0;
26523 memory = get_attr_memory (insn);
26525 /* Show ability of reorder buffer to hide latency of load by executing
26526 in parallel with previous instruction in case
26527 previous instruction is not needed to compute the address. */
26528 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26529 && !ix86_agi_dependent (dep_insn, insn))
26531 if (cost >= 4)
26532 cost -= 4;
26533 else
26534 cost = 0;
26536 break;
26538 case PROCESSOR_SILVERMONT:
26539 case PROCESSOR_KNL:
26540 case PROCESSOR_INTEL:
26541 if (!reload_completed)
26542 return cost;
26544 /* Increase cost of integer loads. */
26545 memory = get_attr_memory (dep_insn);
26546 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26548 enum attr_unit unit = get_attr_unit (dep_insn);
26549 if (unit == UNIT_INTEGER && cost == 1)
26551 if (memory == MEMORY_LOAD)
26552 cost = 3;
26553 else
26555 /* Increase cost of ld/st for short int types only
26556 because of store forwarding issue. */
26557 rtx set = single_set (dep_insn);
26558 if (set && (GET_MODE (SET_DEST (set)) == QImode
26559 || GET_MODE (SET_DEST (set)) == HImode))
26561 /* Increase cost of store/load insn if exact
26562 dependence exists and it is load insn. */
26563 enum attr_memory insn_memory = get_attr_memory (insn);
26564 if (insn_memory == MEMORY_LOAD
26565 && exact_store_load_dependency (dep_insn, insn))
26566 cost = 3;
26572 default:
26573 break;
26576 return cost;
26579 /* How many alternative schedules to try. This should be as wide as the
26580 scheduling freedom in the DFA, but no wider. Making this value too
26581 large results extra work for the scheduler. */
26583 static int
26584 ia32_multipass_dfa_lookahead (void)
26586 switch (ix86_tune)
26588 case PROCESSOR_PENTIUM:
26589 case PROCESSOR_IAMCU:
26590 return 2;
26592 case PROCESSOR_PENTIUMPRO:
26593 case PROCESSOR_K6:
26594 return 1;
26596 case PROCESSOR_BDVER1:
26597 case PROCESSOR_BDVER2:
26598 case PROCESSOR_BDVER3:
26599 case PROCESSOR_BDVER4:
26600 /* We use lookahead value 4 for BD both before and after reload
26601 schedules. Plan is to have value 8 included for O3. */
26602 return 4;
26604 case PROCESSOR_CORE2:
26605 case PROCESSOR_NEHALEM:
26606 case PROCESSOR_SANDYBRIDGE:
26607 case PROCESSOR_HASWELL:
26608 case PROCESSOR_BONNELL:
26609 case PROCESSOR_SILVERMONT:
26610 case PROCESSOR_KNL:
26611 case PROCESSOR_INTEL:
26612 /* Generally, we want haifa-sched:max_issue() to look ahead as far
26613 as many instructions can be executed on a cycle, i.e.,
26614 issue_rate. I wonder why tuning for many CPUs does not do this. */
26615 if (reload_completed)
26616 return ix86_issue_rate ();
26617 /* Don't use lookahead for pre-reload schedule to save compile time. */
26618 return 0;
26620 default:
26621 return 0;
26625 /* Return true if target platform supports macro-fusion. */
26627 static bool
26628 ix86_macro_fusion_p ()
26630 return TARGET_FUSE_CMP_AND_BRANCH;
26633 /* Check whether current microarchitecture support macro fusion
26634 for insn pair "CONDGEN + CONDJMP". Refer to
26635 "Intel Architectures Optimization Reference Manual". */
26637 static bool
26638 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26640 rtx src, dest;
26641 enum rtx_code ccode;
26642 rtx compare_set = NULL_RTX, test_if, cond;
26643 rtx alu_set = NULL_RTX, addr = NULL_RTX;
26645 if (!any_condjump_p (condjmp))
26646 return false;
26648 if (get_attr_type (condgen) != TYPE_TEST
26649 && get_attr_type (condgen) != TYPE_ICMP
26650 && get_attr_type (condgen) != TYPE_INCDEC
26651 && get_attr_type (condgen) != TYPE_ALU)
26652 return false;
26654 compare_set = single_set (condgen);
26655 if (compare_set == NULL_RTX
26656 && !TARGET_FUSE_ALU_AND_BRANCH)
26657 return false;
26659 if (compare_set == NULL_RTX)
26661 int i;
26662 rtx pat = PATTERN (condgen);
26663 for (i = 0; i < XVECLEN (pat, 0); i++)
26664 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26666 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26667 if (GET_CODE (set_src) == COMPARE)
26668 compare_set = XVECEXP (pat, 0, i);
26669 else
26670 alu_set = XVECEXP (pat, 0, i);
26673 if (compare_set == NULL_RTX)
26674 return false;
26675 src = SET_SRC (compare_set);
26676 if (GET_CODE (src) != COMPARE)
26677 return false;
26679 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26680 supported. */
26681 if ((MEM_P (XEXP (src, 0))
26682 && CONST_INT_P (XEXP (src, 1)))
26683 || (MEM_P (XEXP (src, 1))
26684 && CONST_INT_P (XEXP (src, 0))))
26685 return false;
26687 /* No fusion for RIP-relative address. */
26688 if (MEM_P (XEXP (src, 0)))
26689 addr = XEXP (XEXP (src, 0), 0);
26690 else if (MEM_P (XEXP (src, 1)))
26691 addr = XEXP (XEXP (src, 1), 0);
26693 if (addr) {
26694 ix86_address parts;
26695 int ok = ix86_decompose_address (addr, &parts);
26696 gcc_assert (ok);
26698 if (rip_relative_addr_p (&parts))
26699 return false;
26702 test_if = SET_SRC (pc_set (condjmp));
26703 cond = XEXP (test_if, 0);
26704 ccode = GET_CODE (cond);
26705 /* Check whether conditional jump use Sign or Overflow Flags. */
26706 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26707 && (ccode == GE
26708 || ccode == GT
26709 || ccode == LE
26710 || ccode == LT))
26711 return false;
26713 /* Return true for TYPE_TEST and TYPE_ICMP. */
26714 if (get_attr_type (condgen) == TYPE_TEST
26715 || get_attr_type (condgen) == TYPE_ICMP)
26716 return true;
26718 /* The following is the case that macro-fusion for alu + jmp. */
26719 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26720 return false;
26722 /* No fusion for alu op with memory destination operand. */
26723 dest = SET_DEST (alu_set);
26724 if (MEM_P (dest))
26725 return false;
26727 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26728 supported. */
26729 if (get_attr_type (condgen) == TYPE_INCDEC
26730 && (ccode == GEU
26731 || ccode == GTU
26732 || ccode == LEU
26733 || ccode == LTU))
26734 return false;
26736 return true;
26739 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26740 execution. It is applied if
26741 (1) IMUL instruction is on the top of list;
26742 (2) There exists the only producer of independent IMUL instruction in
26743 ready list.
26744 Return index of IMUL producer if it was found and -1 otherwise. */
26745 static int
26746 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26748 rtx_insn *insn;
26749 rtx set, insn1, insn2;
26750 sd_iterator_def sd_it;
26751 dep_t dep;
26752 int index = -1;
26753 int i;
26755 if (!TARGET_BONNELL)
26756 return index;
26758 /* Check that IMUL instruction is on the top of ready list. */
26759 insn = ready[n_ready - 1];
26760 set = single_set (insn);
26761 if (!set)
26762 return index;
26763 if (!(GET_CODE (SET_SRC (set)) == MULT
26764 && GET_MODE (SET_SRC (set)) == SImode))
26765 return index;
26767 /* Search for producer of independent IMUL instruction. */
26768 for (i = n_ready - 2; i >= 0; i--)
26770 insn = ready[i];
26771 if (!NONDEBUG_INSN_P (insn))
26772 continue;
26773 /* Skip IMUL instruction. */
26774 insn2 = PATTERN (insn);
26775 if (GET_CODE (insn2) == PARALLEL)
26776 insn2 = XVECEXP (insn2, 0, 0);
26777 if (GET_CODE (insn2) == SET
26778 && GET_CODE (SET_SRC (insn2)) == MULT
26779 && GET_MODE (SET_SRC (insn2)) == SImode)
26780 continue;
26782 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26784 rtx con;
26785 con = DEP_CON (dep);
26786 if (!NONDEBUG_INSN_P (con))
26787 continue;
26788 insn1 = PATTERN (con);
26789 if (GET_CODE (insn1) == PARALLEL)
26790 insn1 = XVECEXP (insn1, 0, 0);
26792 if (GET_CODE (insn1) == SET
26793 && GET_CODE (SET_SRC (insn1)) == MULT
26794 && GET_MODE (SET_SRC (insn1)) == SImode)
26796 sd_iterator_def sd_it1;
26797 dep_t dep1;
26798 /* Check if there is no other dependee for IMUL. */
26799 index = i;
26800 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26802 rtx pro;
26803 pro = DEP_PRO (dep1);
26804 if (!NONDEBUG_INSN_P (pro))
26805 continue;
26806 if (pro != insn)
26807 index = -1;
26809 if (index >= 0)
26810 break;
26813 if (index >= 0)
26814 break;
26816 return index;
26819 /* Try to find the best candidate on the top of ready list if two insns
26820 have the same priority - candidate is best if its dependees were
26821 scheduled earlier. Applied for Silvermont only.
26822 Return true if top 2 insns must be interchanged. */
26823 static bool
26824 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26826 rtx_insn *top = ready[n_ready - 1];
26827 rtx_insn *next = ready[n_ready - 2];
26828 rtx set;
26829 sd_iterator_def sd_it;
26830 dep_t dep;
26831 int clock1 = -1;
26832 int clock2 = -1;
26833 #define INSN_TICK(INSN) (HID (INSN)->tick)
26835 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26836 return false;
26838 if (!NONDEBUG_INSN_P (top))
26839 return false;
26840 if (!NONJUMP_INSN_P (top))
26841 return false;
26842 if (!NONDEBUG_INSN_P (next))
26843 return false;
26844 if (!NONJUMP_INSN_P (next))
26845 return false;
26846 set = single_set (top);
26847 if (!set)
26848 return false;
26849 set = single_set (next);
26850 if (!set)
26851 return false;
26853 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26855 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26856 return false;
26857 /* Determine winner more precise. */
26858 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26860 rtx pro;
26861 pro = DEP_PRO (dep);
26862 if (!NONDEBUG_INSN_P (pro))
26863 continue;
26864 if (INSN_TICK (pro) > clock1)
26865 clock1 = INSN_TICK (pro);
26867 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26869 rtx pro;
26870 pro = DEP_PRO (dep);
26871 if (!NONDEBUG_INSN_P (pro))
26872 continue;
26873 if (INSN_TICK (pro) > clock2)
26874 clock2 = INSN_TICK (pro);
26877 if (clock1 == clock2)
26879 /* Determine winner - load must win. */
26880 enum attr_memory memory1, memory2;
26881 memory1 = get_attr_memory (top);
26882 memory2 = get_attr_memory (next);
26883 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26884 return true;
26886 return (bool) (clock2 < clock1);
26888 return false;
26889 #undef INSN_TICK
26892 /* Perform possible reodering of ready list for Atom/Silvermont only.
26893 Return issue rate. */
26894 static int
26895 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26896 int *pn_ready, int clock_var)
26898 int issue_rate = -1;
26899 int n_ready = *pn_ready;
26900 int i;
26901 rtx_insn *insn;
26902 int index = -1;
26904 /* Set up issue rate. */
26905 issue_rate = ix86_issue_rate ();
26907 /* Do reodering for BONNELL/SILVERMONT only. */
26908 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26909 return issue_rate;
26911 /* Nothing to do if ready list contains only 1 instruction. */
26912 if (n_ready <= 1)
26913 return issue_rate;
26915 /* Do reodering for post-reload scheduler only. */
26916 if (!reload_completed)
26917 return issue_rate;
26919 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26921 if (sched_verbose > 1)
26922 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26923 INSN_UID (ready[index]));
26925 /* Put IMUL producer (ready[index]) at the top of ready list. */
26926 insn = ready[index];
26927 for (i = index; i < n_ready - 1; i++)
26928 ready[i] = ready[i + 1];
26929 ready[n_ready - 1] = insn;
26930 return issue_rate;
26933 /* Skip selective scheduling since HID is not populated in it. */
26934 if (clock_var != 0
26935 && !sel_sched_p ()
26936 && swap_top_of_ready_list (ready, n_ready))
26938 if (sched_verbose > 1)
26939 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26940 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26941 /* Swap 2 top elements of ready list. */
26942 insn = ready[n_ready - 1];
26943 ready[n_ready - 1] = ready[n_ready - 2];
26944 ready[n_ready - 2] = insn;
26946 return issue_rate;
26949 static bool
26950 ix86_class_likely_spilled_p (reg_class_t);
26952 /* Returns true if lhs of insn is HW function argument register and set up
26953 is_spilled to true if it is likely spilled HW register. */
26954 static bool
26955 insn_is_function_arg (rtx insn, bool* is_spilled)
26957 rtx dst;
26959 if (!NONDEBUG_INSN_P (insn))
26960 return false;
26961 /* Call instructions are not movable, ignore it. */
26962 if (CALL_P (insn))
26963 return false;
26964 insn = PATTERN (insn);
26965 if (GET_CODE (insn) == PARALLEL)
26966 insn = XVECEXP (insn, 0, 0);
26967 if (GET_CODE (insn) != SET)
26968 return false;
26969 dst = SET_DEST (insn);
26970 if (REG_P (dst) && HARD_REGISTER_P (dst)
26971 && ix86_function_arg_regno_p (REGNO (dst)))
26973 /* Is it likely spilled HW register? */
26974 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26975 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26976 *is_spilled = true;
26977 return true;
26979 return false;
26982 /* Add output dependencies for chain of function adjacent arguments if only
26983 there is a move to likely spilled HW register. Return first argument
26984 if at least one dependence was added or NULL otherwise. */
26985 static rtx_insn *
26986 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26988 rtx_insn *insn;
26989 rtx_insn *last = call;
26990 rtx_insn *first_arg = NULL;
26991 bool is_spilled = false;
26993 head = PREV_INSN (head);
26995 /* Find nearest to call argument passing instruction. */
26996 while (true)
26998 last = PREV_INSN (last);
26999 if (last == head)
27000 return NULL;
27001 if (!NONDEBUG_INSN_P (last))
27002 continue;
27003 if (insn_is_function_arg (last, &is_spilled))
27004 break;
27005 return NULL;
27008 first_arg = last;
27009 while (true)
27011 insn = PREV_INSN (last);
27012 if (!INSN_P (insn))
27013 break;
27014 if (insn == head)
27015 break;
27016 if (!NONDEBUG_INSN_P (insn))
27018 last = insn;
27019 continue;
27021 if (insn_is_function_arg (insn, &is_spilled))
27023 /* Add output depdendence between two function arguments if chain
27024 of output arguments contains likely spilled HW registers. */
27025 if (is_spilled)
27026 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27027 first_arg = last = insn;
27029 else
27030 break;
27032 if (!is_spilled)
27033 return NULL;
27034 return first_arg;
27037 /* Add output or anti dependency from insn to first_arg to restrict its code
27038 motion. */
27039 static void
27040 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
27042 rtx set;
27043 rtx tmp;
27045 /* Add anti dependencies for bounds stores. */
27046 if (INSN_P (insn)
27047 && GET_CODE (PATTERN (insn)) == PARALLEL
27048 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
27049 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
27051 add_dependence (first_arg, insn, REG_DEP_ANTI);
27052 return;
27055 set = single_set (insn);
27056 if (!set)
27057 return;
27058 tmp = SET_DEST (set);
27059 if (REG_P (tmp))
27061 /* Add output dependency to the first function argument. */
27062 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27063 return;
27065 /* Add anti dependency. */
27066 add_dependence (first_arg, insn, REG_DEP_ANTI);
27069 /* Avoid cross block motion of function argument through adding dependency
27070 from the first non-jump instruction in bb. */
27071 static void
27072 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27074 rtx_insn *insn = BB_END (bb);
27076 while (insn)
27078 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27080 rtx set = single_set (insn);
27081 if (set)
27083 avoid_func_arg_motion (arg, insn);
27084 return;
27087 if (insn == BB_HEAD (bb))
27088 return;
27089 insn = PREV_INSN (insn);
27093 /* Hook for pre-reload schedule - avoid motion of function arguments
27094 passed in likely spilled HW registers. */
27095 static void
27096 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27098 rtx_insn *insn;
27099 rtx_insn *first_arg = NULL;
27100 if (reload_completed)
27101 return;
27102 while (head != tail && DEBUG_INSN_P (head))
27103 head = NEXT_INSN (head);
27104 for (insn = tail; insn != head; insn = PREV_INSN (insn))
27105 if (INSN_P (insn) && CALL_P (insn))
27107 first_arg = add_parameter_dependencies (insn, head);
27108 if (first_arg)
27110 /* Add dependee for first argument to predecessors if only
27111 region contains more than one block. */
27112 basic_block bb = BLOCK_FOR_INSN (insn);
27113 int rgn = CONTAINING_RGN (bb->index);
27114 int nr_blks = RGN_NR_BLOCKS (rgn);
27115 /* Skip trivial regions and region head blocks that can have
27116 predecessors outside of region. */
27117 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27119 edge e;
27120 edge_iterator ei;
27122 /* Regions are SCCs with the exception of selective
27123 scheduling with pipelining of outer blocks enabled.
27124 So also check that immediate predecessors of a non-head
27125 block are in the same region. */
27126 FOR_EACH_EDGE (e, ei, bb->preds)
27128 /* Avoid creating of loop-carried dependencies through
27129 using topological ordering in the region. */
27130 if (rgn == CONTAINING_RGN (e->src->index)
27131 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27132 add_dependee_for_func_arg (first_arg, e->src);
27135 insn = first_arg;
27136 if (insn == head)
27137 break;
27140 else if (first_arg)
27141 avoid_func_arg_motion (first_arg, insn);
27144 /* Hook for pre-reload schedule - set priority of moves from likely spilled
27145 HW registers to maximum, to schedule them at soon as possible. These are
27146 moves from function argument registers at the top of the function entry
27147 and moves from function return value registers after call. */
27148 static int
27149 ix86_adjust_priority (rtx_insn *insn, int priority)
27151 rtx set;
27153 if (reload_completed)
27154 return priority;
27156 if (!NONDEBUG_INSN_P (insn))
27157 return priority;
27159 set = single_set (insn);
27160 if (set)
27162 rtx tmp = SET_SRC (set);
27163 if (REG_P (tmp)
27164 && HARD_REGISTER_P (tmp)
27165 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27166 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27167 return current_sched_info->sched_max_insns_priority;
27170 return priority;
27173 /* Model decoder of Core 2/i7.
27174 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27175 track the instruction fetch block boundaries and make sure that long
27176 (9+ bytes) instructions are assigned to D0. */
27178 /* Maximum length of an insn that can be handled by
27179 a secondary decoder unit. '8' for Core 2/i7. */
27180 static int core2i7_secondary_decoder_max_insn_size;
27182 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27183 '16' for Core 2/i7. */
27184 static int core2i7_ifetch_block_size;
27186 /* Maximum number of instructions decoder can handle per cycle.
27187 '6' for Core 2/i7. */
27188 static int core2i7_ifetch_block_max_insns;
27190 typedef struct ix86_first_cycle_multipass_data_ *
27191 ix86_first_cycle_multipass_data_t;
27192 typedef const struct ix86_first_cycle_multipass_data_ *
27193 const_ix86_first_cycle_multipass_data_t;
27195 /* A variable to store target state across calls to max_issue within
27196 one cycle. */
27197 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27198 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27200 /* Initialize DATA. */
27201 static void
27202 core2i7_first_cycle_multipass_init (void *_data)
27204 ix86_first_cycle_multipass_data_t data
27205 = (ix86_first_cycle_multipass_data_t) _data;
27207 data->ifetch_block_len = 0;
27208 data->ifetch_block_n_insns = 0;
27209 data->ready_try_change = NULL;
27210 data->ready_try_change_size = 0;
27213 /* Advancing the cycle; reset ifetch block counts. */
27214 static void
27215 core2i7_dfa_post_advance_cycle (void)
27217 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27219 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27221 data->ifetch_block_len = 0;
27222 data->ifetch_block_n_insns = 0;
27225 static int min_insn_size (rtx_insn *);
27227 /* Filter out insns from ready_try that the core will not be able to issue
27228 on current cycle due to decoder. */
27229 static void
27230 core2i7_first_cycle_multipass_filter_ready_try
27231 (const_ix86_first_cycle_multipass_data_t data,
27232 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27234 while (n_ready--)
27236 rtx_insn *insn;
27237 int insn_size;
27239 if (ready_try[n_ready])
27240 continue;
27242 insn = get_ready_element (n_ready);
27243 insn_size = min_insn_size (insn);
27245 if (/* If this is a too long an insn for a secondary decoder ... */
27246 (!first_cycle_insn_p
27247 && insn_size > core2i7_secondary_decoder_max_insn_size)
27248 /* ... or it would not fit into the ifetch block ... */
27249 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27250 /* ... or the decoder is full already ... */
27251 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27252 /* ... mask the insn out. */
27254 ready_try[n_ready] = 1;
27256 if (data->ready_try_change)
27257 bitmap_set_bit (data->ready_try_change, n_ready);
27262 /* Prepare for a new round of multipass lookahead scheduling. */
27263 static void
27264 core2i7_first_cycle_multipass_begin (void *_data,
27265 signed char *ready_try, int n_ready,
27266 bool first_cycle_insn_p)
27268 ix86_first_cycle_multipass_data_t data
27269 = (ix86_first_cycle_multipass_data_t) _data;
27270 const_ix86_first_cycle_multipass_data_t prev_data
27271 = ix86_first_cycle_multipass_data;
27273 /* Restore the state from the end of the previous round. */
27274 data->ifetch_block_len = prev_data->ifetch_block_len;
27275 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27277 /* Filter instructions that cannot be issued on current cycle due to
27278 decoder restrictions. */
27279 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27280 first_cycle_insn_p);
27283 /* INSN is being issued in current solution. Account for its impact on
27284 the decoder model. */
27285 static void
27286 core2i7_first_cycle_multipass_issue (void *_data,
27287 signed char *ready_try, int n_ready,
27288 rtx_insn *insn, const void *_prev_data)
27290 ix86_first_cycle_multipass_data_t data
27291 = (ix86_first_cycle_multipass_data_t) _data;
27292 const_ix86_first_cycle_multipass_data_t prev_data
27293 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27295 int insn_size = min_insn_size (insn);
27297 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27298 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27299 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27300 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27302 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
27303 if (!data->ready_try_change)
27305 data->ready_try_change = sbitmap_alloc (n_ready);
27306 data->ready_try_change_size = n_ready;
27308 else if (data->ready_try_change_size < n_ready)
27310 data->ready_try_change = sbitmap_resize (data->ready_try_change,
27311 n_ready, 0);
27312 data->ready_try_change_size = n_ready;
27314 bitmap_clear (data->ready_try_change);
27316 /* Filter out insns from ready_try that the core will not be able to issue
27317 on current cycle due to decoder. */
27318 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27319 false);
27322 /* Revert the effect on ready_try. */
27323 static void
27324 core2i7_first_cycle_multipass_backtrack (const void *_data,
27325 signed char *ready_try,
27326 int n_ready ATTRIBUTE_UNUSED)
27328 const_ix86_first_cycle_multipass_data_t data
27329 = (const_ix86_first_cycle_multipass_data_t) _data;
27330 unsigned int i = 0;
27331 sbitmap_iterator sbi;
27333 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27334 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27336 ready_try[i] = 0;
27340 /* Save the result of multipass lookahead scheduling for the next round. */
27341 static void
27342 core2i7_first_cycle_multipass_end (const void *_data)
27344 const_ix86_first_cycle_multipass_data_t data
27345 = (const_ix86_first_cycle_multipass_data_t) _data;
27346 ix86_first_cycle_multipass_data_t next_data
27347 = ix86_first_cycle_multipass_data;
27349 if (data != NULL)
27351 next_data->ifetch_block_len = data->ifetch_block_len;
27352 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27356 /* Deallocate target data. */
27357 static void
27358 core2i7_first_cycle_multipass_fini (void *_data)
27360 ix86_first_cycle_multipass_data_t data
27361 = (ix86_first_cycle_multipass_data_t) _data;
27363 if (data->ready_try_change)
27365 sbitmap_free (data->ready_try_change);
27366 data->ready_try_change = NULL;
27367 data->ready_try_change_size = 0;
27371 /* Prepare for scheduling pass. */
27372 static void
27373 ix86_sched_init_global (FILE *, int, int)
27375 /* Install scheduling hooks for current CPU. Some of these hooks are used
27376 in time-critical parts of the scheduler, so we only set them up when
27377 they are actually used. */
27378 switch (ix86_tune)
27380 case PROCESSOR_CORE2:
27381 case PROCESSOR_NEHALEM:
27382 case PROCESSOR_SANDYBRIDGE:
27383 case PROCESSOR_HASWELL:
27384 /* Do not perform multipass scheduling for pre-reload schedule
27385 to save compile time. */
27386 if (reload_completed)
27388 targetm.sched.dfa_post_advance_cycle
27389 = core2i7_dfa_post_advance_cycle;
27390 targetm.sched.first_cycle_multipass_init
27391 = core2i7_first_cycle_multipass_init;
27392 targetm.sched.first_cycle_multipass_begin
27393 = core2i7_first_cycle_multipass_begin;
27394 targetm.sched.first_cycle_multipass_issue
27395 = core2i7_first_cycle_multipass_issue;
27396 targetm.sched.first_cycle_multipass_backtrack
27397 = core2i7_first_cycle_multipass_backtrack;
27398 targetm.sched.first_cycle_multipass_end
27399 = core2i7_first_cycle_multipass_end;
27400 targetm.sched.first_cycle_multipass_fini
27401 = core2i7_first_cycle_multipass_fini;
27403 /* Set decoder parameters. */
27404 core2i7_secondary_decoder_max_insn_size = 8;
27405 core2i7_ifetch_block_size = 16;
27406 core2i7_ifetch_block_max_insns = 6;
27407 break;
27409 /* ... Fall through ... */
27410 default:
27411 targetm.sched.dfa_post_advance_cycle = NULL;
27412 targetm.sched.first_cycle_multipass_init = NULL;
27413 targetm.sched.first_cycle_multipass_begin = NULL;
27414 targetm.sched.first_cycle_multipass_issue = NULL;
27415 targetm.sched.first_cycle_multipass_backtrack = NULL;
27416 targetm.sched.first_cycle_multipass_end = NULL;
27417 targetm.sched.first_cycle_multipass_fini = NULL;
27418 break;
27423 /* Compute the alignment given to a constant that is being placed in memory.
27424 EXP is the constant and ALIGN is the alignment that the object would
27425 ordinarily have.
27426 The value of this function is used instead of that alignment to align
27427 the object. */
27430 ix86_constant_alignment (tree exp, int align)
27432 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27433 || TREE_CODE (exp) == INTEGER_CST)
27435 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27436 return 64;
27437 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27438 return 128;
27440 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27441 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27442 return BITS_PER_WORD;
27444 return align;
27447 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
27448 the data type, and ALIGN is the alignment that the object would
27449 ordinarily have. */
27451 static int
27452 iamcu_alignment (tree type, int align)
27454 enum machine_mode mode;
27456 if (align < 32 || TYPE_USER_ALIGN (type))
27457 return align;
27459 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
27460 bytes. */
27461 mode = TYPE_MODE (strip_array_types (type));
27462 switch (GET_MODE_CLASS (mode))
27464 case MODE_INT:
27465 case MODE_COMPLEX_INT:
27466 case MODE_COMPLEX_FLOAT:
27467 case MODE_FLOAT:
27468 case MODE_DECIMAL_FLOAT:
27469 return 32;
27470 default:
27471 return align;
27475 /* Compute the alignment for a static variable.
27476 TYPE is the data type, and ALIGN is the alignment that
27477 the object would ordinarily have. The value of this function is used
27478 instead of that alignment to align the object. */
27481 ix86_data_alignment (tree type, int align, bool opt)
27483 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27484 for symbols from other compilation units or symbols that don't need
27485 to bind locally. In order to preserve some ABI compatibility with
27486 those compilers, ensure we don't decrease alignment from what we
27487 used to assume. */
27489 int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27491 /* A data structure, equal or greater than the size of a cache line
27492 (64 bytes in the Pentium 4 and other recent Intel processors, including
27493 processors based on Intel Core microarchitecture) should be aligned
27494 so that its base address is a multiple of a cache line size. */
27496 int max_align
27497 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27499 if (max_align < BITS_PER_WORD)
27500 max_align = BITS_PER_WORD;
27502 switch (ix86_align_data_type)
27504 case ix86_align_data_type_abi: opt = false; break;
27505 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27506 case ix86_align_data_type_cacheline: break;
27509 if (TARGET_IAMCU)
27510 align = iamcu_alignment (type, align);
27512 if (opt
27513 && AGGREGATE_TYPE_P (type)
27514 && TYPE_SIZE (type)
27515 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27517 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27518 && align < max_align_compat)
27519 align = max_align_compat;
27520 if (wi::geu_p (TYPE_SIZE (type), max_align)
27521 && align < max_align)
27522 align = max_align;
27525 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27526 to 16byte boundary. */
27527 if (TARGET_64BIT)
27529 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27530 && TYPE_SIZE (type)
27531 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27532 && wi::geu_p (TYPE_SIZE (type), 128)
27533 && align < 128)
27534 return 128;
27537 if (!opt)
27538 return align;
27540 if (TREE_CODE (type) == ARRAY_TYPE)
27542 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27543 return 64;
27544 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27545 return 128;
27547 else if (TREE_CODE (type) == COMPLEX_TYPE)
27550 if (TYPE_MODE (type) == DCmode && align < 64)
27551 return 64;
27552 if ((TYPE_MODE (type) == XCmode
27553 || TYPE_MODE (type) == TCmode) && align < 128)
27554 return 128;
27556 else if ((TREE_CODE (type) == RECORD_TYPE
27557 || TREE_CODE (type) == UNION_TYPE
27558 || TREE_CODE (type) == QUAL_UNION_TYPE)
27559 && TYPE_FIELDS (type))
27561 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27562 return 64;
27563 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27564 return 128;
27566 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27567 || TREE_CODE (type) == INTEGER_TYPE)
27569 if (TYPE_MODE (type) == DFmode && align < 64)
27570 return 64;
27571 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27572 return 128;
27575 return align;
27578 /* Compute the alignment for a local variable or a stack slot. EXP is
27579 the data type or decl itself, MODE is the widest mode available and
27580 ALIGN is the alignment that the object would ordinarily have. The
27581 value of this macro is used instead of that alignment to align the
27582 object. */
27584 unsigned int
27585 ix86_local_alignment (tree exp, machine_mode mode,
27586 unsigned int align)
27588 tree type, decl;
27590 if (exp && DECL_P (exp))
27592 type = TREE_TYPE (exp);
27593 decl = exp;
27595 else
27597 type = exp;
27598 decl = NULL;
27601 /* Don't do dynamic stack realignment for long long objects with
27602 -mpreferred-stack-boundary=2. */
27603 if (!TARGET_64BIT
27604 && align == 64
27605 && ix86_preferred_stack_boundary < 64
27606 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27607 && (!type || !TYPE_USER_ALIGN (type))
27608 && (!decl || !DECL_USER_ALIGN (decl)))
27609 align = 32;
27611 /* If TYPE is NULL, we are allocating a stack slot for caller-save
27612 register in MODE. We will return the largest alignment of XF
27613 and DF. */
27614 if (!type)
27616 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27617 align = GET_MODE_ALIGNMENT (DFmode);
27618 return align;
27621 /* Don't increase alignment for Intel MCU psABI. */
27622 if (TARGET_IAMCU)
27623 return align;
27625 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27626 to 16byte boundary. Exact wording is:
27628 An array uses the same alignment as its elements, except that a local or
27629 global array variable of length at least 16 bytes or
27630 a C99 variable-length array variable always has alignment of at least 16 bytes.
27632 This was added to allow use of aligned SSE instructions at arrays. This
27633 rule is meant for static storage (where compiler can not do the analysis
27634 by itself). We follow it for automatic variables only when convenient.
27635 We fully control everything in the function compiled and functions from
27636 other unit can not rely on the alignment.
27638 Exclude va_list type. It is the common case of local array where
27639 we can not benefit from the alignment.
27641 TODO: Probably one should optimize for size only when var is not escaping. */
27642 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27643 && TARGET_SSE)
27645 if (AGGREGATE_TYPE_P (type)
27646 && (va_list_type_node == NULL_TREE
27647 || (TYPE_MAIN_VARIANT (type)
27648 != TYPE_MAIN_VARIANT (va_list_type_node)))
27649 && TYPE_SIZE (type)
27650 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27651 && wi::geu_p (TYPE_SIZE (type), 16)
27652 && align < 128)
27653 return 128;
27655 if (TREE_CODE (type) == ARRAY_TYPE)
27657 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27658 return 64;
27659 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27660 return 128;
27662 else if (TREE_CODE (type) == COMPLEX_TYPE)
27664 if (TYPE_MODE (type) == DCmode && align < 64)
27665 return 64;
27666 if ((TYPE_MODE (type) == XCmode
27667 || TYPE_MODE (type) == TCmode) && align < 128)
27668 return 128;
27670 else if ((TREE_CODE (type) == RECORD_TYPE
27671 || TREE_CODE (type) == UNION_TYPE
27672 || TREE_CODE (type) == QUAL_UNION_TYPE)
27673 && TYPE_FIELDS (type))
27675 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27676 return 64;
27677 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27678 return 128;
27680 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27681 || TREE_CODE (type) == INTEGER_TYPE)
27684 if (TYPE_MODE (type) == DFmode && align < 64)
27685 return 64;
27686 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27687 return 128;
27689 return align;
27692 /* Compute the minimum required alignment for dynamic stack realignment
27693 purposes for a local variable, parameter or a stack slot. EXP is
27694 the data type or decl itself, MODE is its mode and ALIGN is the
27695 alignment that the object would ordinarily have. */
27697 unsigned int
27698 ix86_minimum_alignment (tree exp, machine_mode mode,
27699 unsigned int align)
27701 tree type, decl;
27703 if (exp && DECL_P (exp))
27705 type = TREE_TYPE (exp);
27706 decl = exp;
27708 else
27710 type = exp;
27711 decl = NULL;
27714 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27715 return align;
27717 /* Don't do dynamic stack realignment for long long objects with
27718 -mpreferred-stack-boundary=2. */
27719 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27720 && (!type || !TYPE_USER_ALIGN (type))
27721 && (!decl || !DECL_USER_ALIGN (decl)))
27722 return 32;
27724 return align;
27727 /* Find a location for the static chain incoming to a nested function.
27728 This is a register, unless all free registers are used by arguments. */
27730 static rtx
27731 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27733 unsigned regno;
27735 /* While this function won't be called by the middle-end when a static
27736 chain isn't needed, it's also used throughout the backend so it's
27737 easiest to keep this check centralized. */
27738 if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27739 return NULL;
27741 if (TARGET_64BIT)
27743 /* We always use R10 in 64-bit mode. */
27744 regno = R10_REG;
27746 else
27748 const_tree fntype, fndecl;
27749 unsigned int ccvt;
27751 /* By default in 32-bit mode we use ECX to pass the static chain. */
27752 regno = CX_REG;
27754 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27756 fntype = TREE_TYPE (fndecl_or_type);
27757 fndecl = fndecl_or_type;
27759 else
27761 fntype = fndecl_or_type;
27762 fndecl = NULL;
27765 ccvt = ix86_get_callcvt (fntype);
27766 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27768 /* Fastcall functions use ecx/edx for arguments, which leaves
27769 us with EAX for the static chain.
27770 Thiscall functions use ecx for arguments, which also
27771 leaves us with EAX for the static chain. */
27772 regno = AX_REG;
27774 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27776 /* Thiscall functions use ecx for arguments, which leaves
27777 us with EAX and EDX for the static chain.
27778 We are using for abi-compatibility EAX. */
27779 regno = AX_REG;
27781 else if (ix86_function_regparm (fntype, fndecl) == 3)
27783 /* For regparm 3, we have no free call-clobbered registers in
27784 which to store the static chain. In order to implement this,
27785 we have the trampoline push the static chain to the stack.
27786 However, we can't push a value below the return address when
27787 we call the nested function directly, so we have to use an
27788 alternate entry point. For this we use ESI, and have the
27789 alternate entry point push ESI, so that things appear the
27790 same once we're executing the nested function. */
27791 if (incoming_p)
27793 if (fndecl == current_function_decl)
27794 ix86_static_chain_on_stack = true;
27795 return gen_frame_mem (SImode,
27796 plus_constant (Pmode,
27797 arg_pointer_rtx, -8));
27799 regno = SI_REG;
27803 return gen_rtx_REG (Pmode, regno);
27806 /* Emit RTL insns to initialize the variable parts of a trampoline.
27807 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27808 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27809 to be passed to the target function. */
27811 static void
27812 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27814 rtx mem, fnaddr;
27815 int opcode;
27816 int offset = 0;
27818 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27820 if (TARGET_64BIT)
27822 int size;
27824 /* Load the function address to r11. Try to load address using
27825 the shorter movl instead of movabs. We may want to support
27826 movq for kernel mode, but kernel does not use trampolines at
27827 the moment. FNADDR is a 32bit address and may not be in
27828 DImode when ptr_mode == SImode. Always use movl in this
27829 case. */
27830 if (ptr_mode == SImode
27831 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27833 fnaddr = copy_addr_to_reg (fnaddr);
27835 mem = adjust_address (m_tramp, HImode, offset);
27836 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27838 mem = adjust_address (m_tramp, SImode, offset + 2);
27839 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27840 offset += 6;
27842 else
27844 mem = adjust_address (m_tramp, HImode, offset);
27845 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27847 mem = adjust_address (m_tramp, DImode, offset + 2);
27848 emit_move_insn (mem, fnaddr);
27849 offset += 10;
27852 /* Load static chain using movabs to r10. Use the shorter movl
27853 instead of movabs when ptr_mode == SImode. */
27854 if (ptr_mode == SImode)
27856 opcode = 0xba41;
27857 size = 6;
27859 else
27861 opcode = 0xba49;
27862 size = 10;
27865 mem = adjust_address (m_tramp, HImode, offset);
27866 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27868 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27869 emit_move_insn (mem, chain_value);
27870 offset += size;
27872 /* Jump to r11; the last (unused) byte is a nop, only there to
27873 pad the write out to a single 32-bit store. */
27874 mem = adjust_address (m_tramp, SImode, offset);
27875 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27876 offset += 4;
27878 else
27880 rtx disp, chain;
27882 /* Depending on the static chain location, either load a register
27883 with a constant, or push the constant to the stack. All of the
27884 instructions are the same size. */
27885 chain = ix86_static_chain (fndecl, true);
27886 if (REG_P (chain))
27888 switch (REGNO (chain))
27890 case AX_REG:
27891 opcode = 0xb8; break;
27892 case CX_REG:
27893 opcode = 0xb9; break;
27894 default:
27895 gcc_unreachable ();
27898 else
27899 opcode = 0x68;
27901 mem = adjust_address (m_tramp, QImode, offset);
27902 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27904 mem = adjust_address (m_tramp, SImode, offset + 1);
27905 emit_move_insn (mem, chain_value);
27906 offset += 5;
27908 mem = adjust_address (m_tramp, QImode, offset);
27909 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27911 mem = adjust_address (m_tramp, SImode, offset + 1);
27913 /* Compute offset from the end of the jmp to the target function.
27914 In the case in which the trampoline stores the static chain on
27915 the stack, we need to skip the first insn which pushes the
27916 (call-saved) register static chain; this push is 1 byte. */
27917 offset += 5;
27918 disp = expand_binop (SImode, sub_optab, fnaddr,
27919 plus_constant (Pmode, XEXP (m_tramp, 0),
27920 offset - (MEM_P (chain) ? 1 : 0)),
27921 NULL_RTX, 1, OPTAB_DIRECT);
27922 emit_move_insn (mem, disp);
27925 gcc_assert (offset <= TRAMPOLINE_SIZE);
27927 #ifdef HAVE_ENABLE_EXECUTE_STACK
27928 #ifdef CHECK_EXECUTE_STACK_ENABLED
27929 if (CHECK_EXECUTE_STACK_ENABLED)
27930 #endif
27931 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27932 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27933 #endif
27936 /* The following file contains several enumerations and data structures
27937 built from the definitions in i386-builtin-types.def. */
27939 #include "i386-builtin-types.inc"
27941 /* Table for the ix86 builtin non-function types. */
27942 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27944 /* Retrieve an element from the above table, building some of
27945 the types lazily. */
27947 static tree
27948 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27950 unsigned int index;
27951 tree type, itype;
27953 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27955 type = ix86_builtin_type_tab[(int) tcode];
27956 if (type != NULL)
27957 return type;
27959 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27960 if (tcode <= IX86_BT_LAST_VECT)
27962 machine_mode mode;
27964 index = tcode - IX86_BT_LAST_PRIM - 1;
27965 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27966 mode = ix86_builtin_type_vect_mode[index];
27968 type = build_vector_type_for_mode (itype, mode);
27970 else
27972 int quals;
27974 index = tcode - IX86_BT_LAST_VECT - 1;
27975 if (tcode <= IX86_BT_LAST_PTR)
27976 quals = TYPE_UNQUALIFIED;
27977 else
27978 quals = TYPE_QUAL_CONST;
27980 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27981 if (quals != TYPE_UNQUALIFIED)
27982 itype = build_qualified_type (itype, quals);
27984 type = build_pointer_type (itype);
27987 ix86_builtin_type_tab[(int) tcode] = type;
27988 return type;
27991 /* Table for the ix86 builtin function types. */
27992 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27994 /* Retrieve an element from the above table, building some of
27995 the types lazily. */
27997 static tree
27998 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
28000 tree type;
28002 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
28004 type = ix86_builtin_func_type_tab[(int) tcode];
28005 if (type != NULL)
28006 return type;
28008 if (tcode <= IX86_BT_LAST_FUNC)
28010 unsigned start = ix86_builtin_func_start[(int) tcode];
28011 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
28012 tree rtype, atype, args = void_list_node;
28013 unsigned i;
28015 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
28016 for (i = after - 1; i > start; --i)
28018 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
28019 args = tree_cons (NULL, atype, args);
28022 type = build_function_type (rtype, args);
28024 else
28026 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
28027 enum ix86_builtin_func_type icode;
28029 icode = ix86_builtin_func_alias_base[index];
28030 type = ix86_get_builtin_func_type (icode);
28033 ix86_builtin_func_type_tab[(int) tcode] = type;
28034 return type;
28038 /* Codes for all the SSE/MMX builtins. */
28039 enum ix86_builtins
28041 IX86_BUILTIN_ADDPS,
28042 IX86_BUILTIN_ADDSS,
28043 IX86_BUILTIN_DIVPS,
28044 IX86_BUILTIN_DIVSS,
28045 IX86_BUILTIN_MULPS,
28046 IX86_BUILTIN_MULSS,
28047 IX86_BUILTIN_SUBPS,
28048 IX86_BUILTIN_SUBSS,
28050 IX86_BUILTIN_CMPEQPS,
28051 IX86_BUILTIN_CMPLTPS,
28052 IX86_BUILTIN_CMPLEPS,
28053 IX86_BUILTIN_CMPGTPS,
28054 IX86_BUILTIN_CMPGEPS,
28055 IX86_BUILTIN_CMPNEQPS,
28056 IX86_BUILTIN_CMPNLTPS,
28057 IX86_BUILTIN_CMPNLEPS,
28058 IX86_BUILTIN_CMPNGTPS,
28059 IX86_BUILTIN_CMPNGEPS,
28060 IX86_BUILTIN_CMPORDPS,
28061 IX86_BUILTIN_CMPUNORDPS,
28062 IX86_BUILTIN_CMPEQSS,
28063 IX86_BUILTIN_CMPLTSS,
28064 IX86_BUILTIN_CMPLESS,
28065 IX86_BUILTIN_CMPNEQSS,
28066 IX86_BUILTIN_CMPNLTSS,
28067 IX86_BUILTIN_CMPNLESS,
28068 IX86_BUILTIN_CMPORDSS,
28069 IX86_BUILTIN_CMPUNORDSS,
28071 IX86_BUILTIN_COMIEQSS,
28072 IX86_BUILTIN_COMILTSS,
28073 IX86_BUILTIN_COMILESS,
28074 IX86_BUILTIN_COMIGTSS,
28075 IX86_BUILTIN_COMIGESS,
28076 IX86_BUILTIN_COMINEQSS,
28077 IX86_BUILTIN_UCOMIEQSS,
28078 IX86_BUILTIN_UCOMILTSS,
28079 IX86_BUILTIN_UCOMILESS,
28080 IX86_BUILTIN_UCOMIGTSS,
28081 IX86_BUILTIN_UCOMIGESS,
28082 IX86_BUILTIN_UCOMINEQSS,
28084 IX86_BUILTIN_CVTPI2PS,
28085 IX86_BUILTIN_CVTPS2PI,
28086 IX86_BUILTIN_CVTSI2SS,
28087 IX86_BUILTIN_CVTSI642SS,
28088 IX86_BUILTIN_CVTSS2SI,
28089 IX86_BUILTIN_CVTSS2SI64,
28090 IX86_BUILTIN_CVTTPS2PI,
28091 IX86_BUILTIN_CVTTSS2SI,
28092 IX86_BUILTIN_CVTTSS2SI64,
28094 IX86_BUILTIN_MAXPS,
28095 IX86_BUILTIN_MAXSS,
28096 IX86_BUILTIN_MINPS,
28097 IX86_BUILTIN_MINSS,
28099 IX86_BUILTIN_LOADUPS,
28100 IX86_BUILTIN_STOREUPS,
28101 IX86_BUILTIN_MOVSS,
28103 IX86_BUILTIN_MOVHLPS,
28104 IX86_BUILTIN_MOVLHPS,
28105 IX86_BUILTIN_LOADHPS,
28106 IX86_BUILTIN_LOADLPS,
28107 IX86_BUILTIN_STOREHPS,
28108 IX86_BUILTIN_STORELPS,
28110 IX86_BUILTIN_MASKMOVQ,
28111 IX86_BUILTIN_MOVMSKPS,
28112 IX86_BUILTIN_PMOVMSKB,
28114 IX86_BUILTIN_MOVNTPS,
28115 IX86_BUILTIN_MOVNTQ,
28117 IX86_BUILTIN_LOADDQU,
28118 IX86_BUILTIN_STOREDQU,
28120 IX86_BUILTIN_PACKSSWB,
28121 IX86_BUILTIN_PACKSSDW,
28122 IX86_BUILTIN_PACKUSWB,
28124 IX86_BUILTIN_PADDB,
28125 IX86_BUILTIN_PADDW,
28126 IX86_BUILTIN_PADDD,
28127 IX86_BUILTIN_PADDQ,
28128 IX86_BUILTIN_PADDSB,
28129 IX86_BUILTIN_PADDSW,
28130 IX86_BUILTIN_PADDUSB,
28131 IX86_BUILTIN_PADDUSW,
28132 IX86_BUILTIN_PSUBB,
28133 IX86_BUILTIN_PSUBW,
28134 IX86_BUILTIN_PSUBD,
28135 IX86_BUILTIN_PSUBQ,
28136 IX86_BUILTIN_PSUBSB,
28137 IX86_BUILTIN_PSUBSW,
28138 IX86_BUILTIN_PSUBUSB,
28139 IX86_BUILTIN_PSUBUSW,
28141 IX86_BUILTIN_PAND,
28142 IX86_BUILTIN_PANDN,
28143 IX86_BUILTIN_POR,
28144 IX86_BUILTIN_PXOR,
28146 IX86_BUILTIN_PAVGB,
28147 IX86_BUILTIN_PAVGW,
28149 IX86_BUILTIN_PCMPEQB,
28150 IX86_BUILTIN_PCMPEQW,
28151 IX86_BUILTIN_PCMPEQD,
28152 IX86_BUILTIN_PCMPGTB,
28153 IX86_BUILTIN_PCMPGTW,
28154 IX86_BUILTIN_PCMPGTD,
28156 IX86_BUILTIN_PMADDWD,
28158 IX86_BUILTIN_PMAXSW,
28159 IX86_BUILTIN_PMAXUB,
28160 IX86_BUILTIN_PMINSW,
28161 IX86_BUILTIN_PMINUB,
28163 IX86_BUILTIN_PMULHUW,
28164 IX86_BUILTIN_PMULHW,
28165 IX86_BUILTIN_PMULLW,
28167 IX86_BUILTIN_PSADBW,
28168 IX86_BUILTIN_PSHUFW,
28170 IX86_BUILTIN_PSLLW,
28171 IX86_BUILTIN_PSLLD,
28172 IX86_BUILTIN_PSLLQ,
28173 IX86_BUILTIN_PSRAW,
28174 IX86_BUILTIN_PSRAD,
28175 IX86_BUILTIN_PSRLW,
28176 IX86_BUILTIN_PSRLD,
28177 IX86_BUILTIN_PSRLQ,
28178 IX86_BUILTIN_PSLLWI,
28179 IX86_BUILTIN_PSLLDI,
28180 IX86_BUILTIN_PSLLQI,
28181 IX86_BUILTIN_PSRAWI,
28182 IX86_BUILTIN_PSRADI,
28183 IX86_BUILTIN_PSRLWI,
28184 IX86_BUILTIN_PSRLDI,
28185 IX86_BUILTIN_PSRLQI,
28187 IX86_BUILTIN_PUNPCKHBW,
28188 IX86_BUILTIN_PUNPCKHWD,
28189 IX86_BUILTIN_PUNPCKHDQ,
28190 IX86_BUILTIN_PUNPCKLBW,
28191 IX86_BUILTIN_PUNPCKLWD,
28192 IX86_BUILTIN_PUNPCKLDQ,
28194 IX86_BUILTIN_SHUFPS,
28196 IX86_BUILTIN_RCPPS,
28197 IX86_BUILTIN_RCPSS,
28198 IX86_BUILTIN_RSQRTPS,
28199 IX86_BUILTIN_RSQRTPS_NR,
28200 IX86_BUILTIN_RSQRTSS,
28201 IX86_BUILTIN_RSQRTF,
28202 IX86_BUILTIN_SQRTPS,
28203 IX86_BUILTIN_SQRTPS_NR,
28204 IX86_BUILTIN_SQRTSS,
28206 IX86_BUILTIN_UNPCKHPS,
28207 IX86_BUILTIN_UNPCKLPS,
28209 IX86_BUILTIN_ANDPS,
28210 IX86_BUILTIN_ANDNPS,
28211 IX86_BUILTIN_ORPS,
28212 IX86_BUILTIN_XORPS,
28214 IX86_BUILTIN_EMMS,
28215 IX86_BUILTIN_LDMXCSR,
28216 IX86_BUILTIN_STMXCSR,
28217 IX86_BUILTIN_SFENCE,
28219 IX86_BUILTIN_FXSAVE,
28220 IX86_BUILTIN_FXRSTOR,
28221 IX86_BUILTIN_FXSAVE64,
28222 IX86_BUILTIN_FXRSTOR64,
28224 IX86_BUILTIN_XSAVE,
28225 IX86_BUILTIN_XRSTOR,
28226 IX86_BUILTIN_XSAVE64,
28227 IX86_BUILTIN_XRSTOR64,
28229 IX86_BUILTIN_XSAVEOPT,
28230 IX86_BUILTIN_XSAVEOPT64,
28232 IX86_BUILTIN_XSAVEC,
28233 IX86_BUILTIN_XSAVEC64,
28235 IX86_BUILTIN_XSAVES,
28236 IX86_BUILTIN_XRSTORS,
28237 IX86_BUILTIN_XSAVES64,
28238 IX86_BUILTIN_XRSTORS64,
28240 /* 3DNow! Original */
28241 IX86_BUILTIN_FEMMS,
28242 IX86_BUILTIN_PAVGUSB,
28243 IX86_BUILTIN_PF2ID,
28244 IX86_BUILTIN_PFACC,
28245 IX86_BUILTIN_PFADD,
28246 IX86_BUILTIN_PFCMPEQ,
28247 IX86_BUILTIN_PFCMPGE,
28248 IX86_BUILTIN_PFCMPGT,
28249 IX86_BUILTIN_PFMAX,
28250 IX86_BUILTIN_PFMIN,
28251 IX86_BUILTIN_PFMUL,
28252 IX86_BUILTIN_PFRCP,
28253 IX86_BUILTIN_PFRCPIT1,
28254 IX86_BUILTIN_PFRCPIT2,
28255 IX86_BUILTIN_PFRSQIT1,
28256 IX86_BUILTIN_PFRSQRT,
28257 IX86_BUILTIN_PFSUB,
28258 IX86_BUILTIN_PFSUBR,
28259 IX86_BUILTIN_PI2FD,
28260 IX86_BUILTIN_PMULHRW,
28262 /* 3DNow! Athlon Extensions */
28263 IX86_BUILTIN_PF2IW,
28264 IX86_BUILTIN_PFNACC,
28265 IX86_BUILTIN_PFPNACC,
28266 IX86_BUILTIN_PI2FW,
28267 IX86_BUILTIN_PSWAPDSI,
28268 IX86_BUILTIN_PSWAPDSF,
28270 /* SSE2 */
28271 IX86_BUILTIN_ADDPD,
28272 IX86_BUILTIN_ADDSD,
28273 IX86_BUILTIN_DIVPD,
28274 IX86_BUILTIN_DIVSD,
28275 IX86_BUILTIN_MULPD,
28276 IX86_BUILTIN_MULSD,
28277 IX86_BUILTIN_SUBPD,
28278 IX86_BUILTIN_SUBSD,
28280 IX86_BUILTIN_CMPEQPD,
28281 IX86_BUILTIN_CMPLTPD,
28282 IX86_BUILTIN_CMPLEPD,
28283 IX86_BUILTIN_CMPGTPD,
28284 IX86_BUILTIN_CMPGEPD,
28285 IX86_BUILTIN_CMPNEQPD,
28286 IX86_BUILTIN_CMPNLTPD,
28287 IX86_BUILTIN_CMPNLEPD,
28288 IX86_BUILTIN_CMPNGTPD,
28289 IX86_BUILTIN_CMPNGEPD,
28290 IX86_BUILTIN_CMPORDPD,
28291 IX86_BUILTIN_CMPUNORDPD,
28292 IX86_BUILTIN_CMPEQSD,
28293 IX86_BUILTIN_CMPLTSD,
28294 IX86_BUILTIN_CMPLESD,
28295 IX86_BUILTIN_CMPNEQSD,
28296 IX86_BUILTIN_CMPNLTSD,
28297 IX86_BUILTIN_CMPNLESD,
28298 IX86_BUILTIN_CMPORDSD,
28299 IX86_BUILTIN_CMPUNORDSD,
28301 IX86_BUILTIN_COMIEQSD,
28302 IX86_BUILTIN_COMILTSD,
28303 IX86_BUILTIN_COMILESD,
28304 IX86_BUILTIN_COMIGTSD,
28305 IX86_BUILTIN_COMIGESD,
28306 IX86_BUILTIN_COMINEQSD,
28307 IX86_BUILTIN_UCOMIEQSD,
28308 IX86_BUILTIN_UCOMILTSD,
28309 IX86_BUILTIN_UCOMILESD,
28310 IX86_BUILTIN_UCOMIGTSD,
28311 IX86_BUILTIN_UCOMIGESD,
28312 IX86_BUILTIN_UCOMINEQSD,
28314 IX86_BUILTIN_MAXPD,
28315 IX86_BUILTIN_MAXSD,
28316 IX86_BUILTIN_MINPD,
28317 IX86_BUILTIN_MINSD,
28319 IX86_BUILTIN_ANDPD,
28320 IX86_BUILTIN_ANDNPD,
28321 IX86_BUILTIN_ORPD,
28322 IX86_BUILTIN_XORPD,
28324 IX86_BUILTIN_SQRTPD,
28325 IX86_BUILTIN_SQRTSD,
28327 IX86_BUILTIN_UNPCKHPD,
28328 IX86_BUILTIN_UNPCKLPD,
28330 IX86_BUILTIN_SHUFPD,
28332 IX86_BUILTIN_LOADUPD,
28333 IX86_BUILTIN_STOREUPD,
28334 IX86_BUILTIN_MOVSD,
28336 IX86_BUILTIN_LOADHPD,
28337 IX86_BUILTIN_LOADLPD,
28339 IX86_BUILTIN_CVTDQ2PD,
28340 IX86_BUILTIN_CVTDQ2PS,
28342 IX86_BUILTIN_CVTPD2DQ,
28343 IX86_BUILTIN_CVTPD2PI,
28344 IX86_BUILTIN_CVTPD2PS,
28345 IX86_BUILTIN_CVTTPD2DQ,
28346 IX86_BUILTIN_CVTTPD2PI,
28348 IX86_BUILTIN_CVTPI2PD,
28349 IX86_BUILTIN_CVTSI2SD,
28350 IX86_BUILTIN_CVTSI642SD,
28352 IX86_BUILTIN_CVTSD2SI,
28353 IX86_BUILTIN_CVTSD2SI64,
28354 IX86_BUILTIN_CVTSD2SS,
28355 IX86_BUILTIN_CVTSS2SD,
28356 IX86_BUILTIN_CVTTSD2SI,
28357 IX86_BUILTIN_CVTTSD2SI64,
28359 IX86_BUILTIN_CVTPS2DQ,
28360 IX86_BUILTIN_CVTPS2PD,
28361 IX86_BUILTIN_CVTTPS2DQ,
28363 IX86_BUILTIN_MOVNTI,
28364 IX86_BUILTIN_MOVNTI64,
28365 IX86_BUILTIN_MOVNTPD,
28366 IX86_BUILTIN_MOVNTDQ,
28368 IX86_BUILTIN_MOVQ128,
28370 /* SSE2 MMX */
28371 IX86_BUILTIN_MASKMOVDQU,
28372 IX86_BUILTIN_MOVMSKPD,
28373 IX86_BUILTIN_PMOVMSKB128,
28375 IX86_BUILTIN_PACKSSWB128,
28376 IX86_BUILTIN_PACKSSDW128,
28377 IX86_BUILTIN_PACKUSWB128,
28379 IX86_BUILTIN_PADDB128,
28380 IX86_BUILTIN_PADDW128,
28381 IX86_BUILTIN_PADDD128,
28382 IX86_BUILTIN_PADDQ128,
28383 IX86_BUILTIN_PADDSB128,
28384 IX86_BUILTIN_PADDSW128,
28385 IX86_BUILTIN_PADDUSB128,
28386 IX86_BUILTIN_PADDUSW128,
28387 IX86_BUILTIN_PSUBB128,
28388 IX86_BUILTIN_PSUBW128,
28389 IX86_BUILTIN_PSUBD128,
28390 IX86_BUILTIN_PSUBQ128,
28391 IX86_BUILTIN_PSUBSB128,
28392 IX86_BUILTIN_PSUBSW128,
28393 IX86_BUILTIN_PSUBUSB128,
28394 IX86_BUILTIN_PSUBUSW128,
28396 IX86_BUILTIN_PAND128,
28397 IX86_BUILTIN_PANDN128,
28398 IX86_BUILTIN_POR128,
28399 IX86_BUILTIN_PXOR128,
28401 IX86_BUILTIN_PAVGB128,
28402 IX86_BUILTIN_PAVGW128,
28404 IX86_BUILTIN_PCMPEQB128,
28405 IX86_BUILTIN_PCMPEQW128,
28406 IX86_BUILTIN_PCMPEQD128,
28407 IX86_BUILTIN_PCMPGTB128,
28408 IX86_BUILTIN_PCMPGTW128,
28409 IX86_BUILTIN_PCMPGTD128,
28411 IX86_BUILTIN_PMADDWD128,
28413 IX86_BUILTIN_PMAXSW128,
28414 IX86_BUILTIN_PMAXUB128,
28415 IX86_BUILTIN_PMINSW128,
28416 IX86_BUILTIN_PMINUB128,
28418 IX86_BUILTIN_PMULUDQ,
28419 IX86_BUILTIN_PMULUDQ128,
28420 IX86_BUILTIN_PMULHUW128,
28421 IX86_BUILTIN_PMULHW128,
28422 IX86_BUILTIN_PMULLW128,
28424 IX86_BUILTIN_PSADBW128,
28425 IX86_BUILTIN_PSHUFHW,
28426 IX86_BUILTIN_PSHUFLW,
28427 IX86_BUILTIN_PSHUFD,
28429 IX86_BUILTIN_PSLLDQI128,
28430 IX86_BUILTIN_PSLLWI128,
28431 IX86_BUILTIN_PSLLDI128,
28432 IX86_BUILTIN_PSLLQI128,
28433 IX86_BUILTIN_PSRAWI128,
28434 IX86_BUILTIN_PSRADI128,
28435 IX86_BUILTIN_PSRLDQI128,
28436 IX86_BUILTIN_PSRLWI128,
28437 IX86_BUILTIN_PSRLDI128,
28438 IX86_BUILTIN_PSRLQI128,
28440 IX86_BUILTIN_PSLLDQ128,
28441 IX86_BUILTIN_PSLLW128,
28442 IX86_BUILTIN_PSLLD128,
28443 IX86_BUILTIN_PSLLQ128,
28444 IX86_BUILTIN_PSRAW128,
28445 IX86_BUILTIN_PSRAD128,
28446 IX86_BUILTIN_PSRLW128,
28447 IX86_BUILTIN_PSRLD128,
28448 IX86_BUILTIN_PSRLQ128,
28450 IX86_BUILTIN_PUNPCKHBW128,
28451 IX86_BUILTIN_PUNPCKHWD128,
28452 IX86_BUILTIN_PUNPCKHDQ128,
28453 IX86_BUILTIN_PUNPCKHQDQ128,
28454 IX86_BUILTIN_PUNPCKLBW128,
28455 IX86_BUILTIN_PUNPCKLWD128,
28456 IX86_BUILTIN_PUNPCKLDQ128,
28457 IX86_BUILTIN_PUNPCKLQDQ128,
28459 IX86_BUILTIN_CLFLUSH,
28460 IX86_BUILTIN_MFENCE,
28461 IX86_BUILTIN_LFENCE,
28462 IX86_BUILTIN_PAUSE,
28464 IX86_BUILTIN_FNSTENV,
28465 IX86_BUILTIN_FLDENV,
28466 IX86_BUILTIN_FNSTSW,
28467 IX86_BUILTIN_FNCLEX,
28469 IX86_BUILTIN_BSRSI,
28470 IX86_BUILTIN_BSRDI,
28471 IX86_BUILTIN_RDPMC,
28472 IX86_BUILTIN_RDTSC,
28473 IX86_BUILTIN_RDTSCP,
28474 IX86_BUILTIN_ROLQI,
28475 IX86_BUILTIN_ROLHI,
28476 IX86_BUILTIN_RORQI,
28477 IX86_BUILTIN_RORHI,
28479 /* SSE3. */
28480 IX86_BUILTIN_ADDSUBPS,
28481 IX86_BUILTIN_HADDPS,
28482 IX86_BUILTIN_HSUBPS,
28483 IX86_BUILTIN_MOVSHDUP,
28484 IX86_BUILTIN_MOVSLDUP,
28485 IX86_BUILTIN_ADDSUBPD,
28486 IX86_BUILTIN_HADDPD,
28487 IX86_BUILTIN_HSUBPD,
28488 IX86_BUILTIN_LDDQU,
28490 IX86_BUILTIN_MONITOR,
28491 IX86_BUILTIN_MWAIT,
28493 /* SSSE3. */
28494 IX86_BUILTIN_PHADDW,
28495 IX86_BUILTIN_PHADDD,
28496 IX86_BUILTIN_PHADDSW,
28497 IX86_BUILTIN_PHSUBW,
28498 IX86_BUILTIN_PHSUBD,
28499 IX86_BUILTIN_PHSUBSW,
28500 IX86_BUILTIN_PMADDUBSW,
28501 IX86_BUILTIN_PMULHRSW,
28502 IX86_BUILTIN_PSHUFB,
28503 IX86_BUILTIN_PSIGNB,
28504 IX86_BUILTIN_PSIGNW,
28505 IX86_BUILTIN_PSIGND,
28506 IX86_BUILTIN_PALIGNR,
28507 IX86_BUILTIN_PABSB,
28508 IX86_BUILTIN_PABSW,
28509 IX86_BUILTIN_PABSD,
28511 IX86_BUILTIN_PHADDW128,
28512 IX86_BUILTIN_PHADDD128,
28513 IX86_BUILTIN_PHADDSW128,
28514 IX86_BUILTIN_PHSUBW128,
28515 IX86_BUILTIN_PHSUBD128,
28516 IX86_BUILTIN_PHSUBSW128,
28517 IX86_BUILTIN_PMADDUBSW128,
28518 IX86_BUILTIN_PMULHRSW128,
28519 IX86_BUILTIN_PSHUFB128,
28520 IX86_BUILTIN_PSIGNB128,
28521 IX86_BUILTIN_PSIGNW128,
28522 IX86_BUILTIN_PSIGND128,
28523 IX86_BUILTIN_PALIGNR128,
28524 IX86_BUILTIN_PABSB128,
28525 IX86_BUILTIN_PABSW128,
28526 IX86_BUILTIN_PABSD128,
28528 /* AMDFAM10 - SSE4A New Instructions. */
28529 IX86_BUILTIN_MOVNTSD,
28530 IX86_BUILTIN_MOVNTSS,
28531 IX86_BUILTIN_EXTRQI,
28532 IX86_BUILTIN_EXTRQ,
28533 IX86_BUILTIN_INSERTQI,
28534 IX86_BUILTIN_INSERTQ,
28536 /* SSE4.1. */
28537 IX86_BUILTIN_BLENDPD,
28538 IX86_BUILTIN_BLENDPS,
28539 IX86_BUILTIN_BLENDVPD,
28540 IX86_BUILTIN_BLENDVPS,
28541 IX86_BUILTIN_PBLENDVB128,
28542 IX86_BUILTIN_PBLENDW128,
28544 IX86_BUILTIN_DPPD,
28545 IX86_BUILTIN_DPPS,
28547 IX86_BUILTIN_INSERTPS128,
28549 IX86_BUILTIN_MOVNTDQA,
28550 IX86_BUILTIN_MPSADBW128,
28551 IX86_BUILTIN_PACKUSDW128,
28552 IX86_BUILTIN_PCMPEQQ,
28553 IX86_BUILTIN_PHMINPOSUW128,
28555 IX86_BUILTIN_PMAXSB128,
28556 IX86_BUILTIN_PMAXSD128,
28557 IX86_BUILTIN_PMAXUD128,
28558 IX86_BUILTIN_PMAXUW128,
28560 IX86_BUILTIN_PMINSB128,
28561 IX86_BUILTIN_PMINSD128,
28562 IX86_BUILTIN_PMINUD128,
28563 IX86_BUILTIN_PMINUW128,
28565 IX86_BUILTIN_PMOVSXBW128,
28566 IX86_BUILTIN_PMOVSXBD128,
28567 IX86_BUILTIN_PMOVSXBQ128,
28568 IX86_BUILTIN_PMOVSXWD128,
28569 IX86_BUILTIN_PMOVSXWQ128,
28570 IX86_BUILTIN_PMOVSXDQ128,
28572 IX86_BUILTIN_PMOVZXBW128,
28573 IX86_BUILTIN_PMOVZXBD128,
28574 IX86_BUILTIN_PMOVZXBQ128,
28575 IX86_BUILTIN_PMOVZXWD128,
28576 IX86_BUILTIN_PMOVZXWQ128,
28577 IX86_BUILTIN_PMOVZXDQ128,
28579 IX86_BUILTIN_PMULDQ128,
28580 IX86_BUILTIN_PMULLD128,
28582 IX86_BUILTIN_ROUNDSD,
28583 IX86_BUILTIN_ROUNDSS,
28585 IX86_BUILTIN_ROUNDPD,
28586 IX86_BUILTIN_ROUNDPS,
28588 IX86_BUILTIN_FLOORPD,
28589 IX86_BUILTIN_CEILPD,
28590 IX86_BUILTIN_TRUNCPD,
28591 IX86_BUILTIN_RINTPD,
28592 IX86_BUILTIN_ROUNDPD_AZ,
28594 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28595 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28596 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28598 IX86_BUILTIN_FLOORPS,
28599 IX86_BUILTIN_CEILPS,
28600 IX86_BUILTIN_TRUNCPS,
28601 IX86_BUILTIN_RINTPS,
28602 IX86_BUILTIN_ROUNDPS_AZ,
28604 IX86_BUILTIN_FLOORPS_SFIX,
28605 IX86_BUILTIN_CEILPS_SFIX,
28606 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28608 IX86_BUILTIN_PTESTZ,
28609 IX86_BUILTIN_PTESTC,
28610 IX86_BUILTIN_PTESTNZC,
28612 IX86_BUILTIN_VEC_INIT_V2SI,
28613 IX86_BUILTIN_VEC_INIT_V4HI,
28614 IX86_BUILTIN_VEC_INIT_V8QI,
28615 IX86_BUILTIN_VEC_EXT_V2DF,
28616 IX86_BUILTIN_VEC_EXT_V2DI,
28617 IX86_BUILTIN_VEC_EXT_V4SF,
28618 IX86_BUILTIN_VEC_EXT_V4SI,
28619 IX86_BUILTIN_VEC_EXT_V8HI,
28620 IX86_BUILTIN_VEC_EXT_V2SI,
28621 IX86_BUILTIN_VEC_EXT_V4HI,
28622 IX86_BUILTIN_VEC_EXT_V16QI,
28623 IX86_BUILTIN_VEC_SET_V2DI,
28624 IX86_BUILTIN_VEC_SET_V4SF,
28625 IX86_BUILTIN_VEC_SET_V4SI,
28626 IX86_BUILTIN_VEC_SET_V8HI,
28627 IX86_BUILTIN_VEC_SET_V4HI,
28628 IX86_BUILTIN_VEC_SET_V16QI,
28630 IX86_BUILTIN_VEC_PACK_SFIX,
28631 IX86_BUILTIN_VEC_PACK_SFIX256,
28633 /* SSE4.2. */
28634 IX86_BUILTIN_CRC32QI,
28635 IX86_BUILTIN_CRC32HI,
28636 IX86_BUILTIN_CRC32SI,
28637 IX86_BUILTIN_CRC32DI,
28639 IX86_BUILTIN_PCMPESTRI128,
28640 IX86_BUILTIN_PCMPESTRM128,
28641 IX86_BUILTIN_PCMPESTRA128,
28642 IX86_BUILTIN_PCMPESTRC128,
28643 IX86_BUILTIN_PCMPESTRO128,
28644 IX86_BUILTIN_PCMPESTRS128,
28645 IX86_BUILTIN_PCMPESTRZ128,
28646 IX86_BUILTIN_PCMPISTRI128,
28647 IX86_BUILTIN_PCMPISTRM128,
28648 IX86_BUILTIN_PCMPISTRA128,
28649 IX86_BUILTIN_PCMPISTRC128,
28650 IX86_BUILTIN_PCMPISTRO128,
28651 IX86_BUILTIN_PCMPISTRS128,
28652 IX86_BUILTIN_PCMPISTRZ128,
28654 IX86_BUILTIN_PCMPGTQ,
28656 /* AES instructions */
28657 IX86_BUILTIN_AESENC128,
28658 IX86_BUILTIN_AESENCLAST128,
28659 IX86_BUILTIN_AESDEC128,
28660 IX86_BUILTIN_AESDECLAST128,
28661 IX86_BUILTIN_AESIMC128,
28662 IX86_BUILTIN_AESKEYGENASSIST128,
28664 /* PCLMUL instruction */
28665 IX86_BUILTIN_PCLMULQDQ128,
28667 /* AVX */
28668 IX86_BUILTIN_ADDPD256,
28669 IX86_BUILTIN_ADDPS256,
28670 IX86_BUILTIN_ADDSUBPD256,
28671 IX86_BUILTIN_ADDSUBPS256,
28672 IX86_BUILTIN_ANDPD256,
28673 IX86_BUILTIN_ANDPS256,
28674 IX86_BUILTIN_ANDNPD256,
28675 IX86_BUILTIN_ANDNPS256,
28676 IX86_BUILTIN_BLENDPD256,
28677 IX86_BUILTIN_BLENDPS256,
28678 IX86_BUILTIN_BLENDVPD256,
28679 IX86_BUILTIN_BLENDVPS256,
28680 IX86_BUILTIN_DIVPD256,
28681 IX86_BUILTIN_DIVPS256,
28682 IX86_BUILTIN_DPPS256,
28683 IX86_BUILTIN_HADDPD256,
28684 IX86_BUILTIN_HADDPS256,
28685 IX86_BUILTIN_HSUBPD256,
28686 IX86_BUILTIN_HSUBPS256,
28687 IX86_BUILTIN_MAXPD256,
28688 IX86_BUILTIN_MAXPS256,
28689 IX86_BUILTIN_MINPD256,
28690 IX86_BUILTIN_MINPS256,
28691 IX86_BUILTIN_MULPD256,
28692 IX86_BUILTIN_MULPS256,
28693 IX86_BUILTIN_ORPD256,
28694 IX86_BUILTIN_ORPS256,
28695 IX86_BUILTIN_SHUFPD256,
28696 IX86_BUILTIN_SHUFPS256,
28697 IX86_BUILTIN_SUBPD256,
28698 IX86_BUILTIN_SUBPS256,
28699 IX86_BUILTIN_XORPD256,
28700 IX86_BUILTIN_XORPS256,
28701 IX86_BUILTIN_CMPSD,
28702 IX86_BUILTIN_CMPSS,
28703 IX86_BUILTIN_CMPPD,
28704 IX86_BUILTIN_CMPPS,
28705 IX86_BUILTIN_CMPPD256,
28706 IX86_BUILTIN_CMPPS256,
28707 IX86_BUILTIN_CVTDQ2PD256,
28708 IX86_BUILTIN_CVTDQ2PS256,
28709 IX86_BUILTIN_CVTPD2PS256,
28710 IX86_BUILTIN_CVTPS2DQ256,
28711 IX86_BUILTIN_CVTPS2PD256,
28712 IX86_BUILTIN_CVTTPD2DQ256,
28713 IX86_BUILTIN_CVTPD2DQ256,
28714 IX86_BUILTIN_CVTTPS2DQ256,
28715 IX86_BUILTIN_EXTRACTF128PD256,
28716 IX86_BUILTIN_EXTRACTF128PS256,
28717 IX86_BUILTIN_EXTRACTF128SI256,
28718 IX86_BUILTIN_VZEROALL,
28719 IX86_BUILTIN_VZEROUPPER,
28720 IX86_BUILTIN_VPERMILVARPD,
28721 IX86_BUILTIN_VPERMILVARPS,
28722 IX86_BUILTIN_VPERMILVARPD256,
28723 IX86_BUILTIN_VPERMILVARPS256,
28724 IX86_BUILTIN_VPERMILPD,
28725 IX86_BUILTIN_VPERMILPS,
28726 IX86_BUILTIN_VPERMILPD256,
28727 IX86_BUILTIN_VPERMILPS256,
28728 IX86_BUILTIN_VPERMIL2PD,
28729 IX86_BUILTIN_VPERMIL2PS,
28730 IX86_BUILTIN_VPERMIL2PD256,
28731 IX86_BUILTIN_VPERMIL2PS256,
28732 IX86_BUILTIN_VPERM2F128PD256,
28733 IX86_BUILTIN_VPERM2F128PS256,
28734 IX86_BUILTIN_VPERM2F128SI256,
28735 IX86_BUILTIN_VBROADCASTSS,
28736 IX86_BUILTIN_VBROADCASTSD256,
28737 IX86_BUILTIN_VBROADCASTSS256,
28738 IX86_BUILTIN_VBROADCASTPD256,
28739 IX86_BUILTIN_VBROADCASTPS256,
28740 IX86_BUILTIN_VINSERTF128PD256,
28741 IX86_BUILTIN_VINSERTF128PS256,
28742 IX86_BUILTIN_VINSERTF128SI256,
28743 IX86_BUILTIN_LOADUPD256,
28744 IX86_BUILTIN_LOADUPS256,
28745 IX86_BUILTIN_STOREUPD256,
28746 IX86_BUILTIN_STOREUPS256,
28747 IX86_BUILTIN_LDDQU256,
28748 IX86_BUILTIN_MOVNTDQ256,
28749 IX86_BUILTIN_MOVNTPD256,
28750 IX86_BUILTIN_MOVNTPS256,
28751 IX86_BUILTIN_LOADDQU256,
28752 IX86_BUILTIN_STOREDQU256,
28753 IX86_BUILTIN_MASKLOADPD,
28754 IX86_BUILTIN_MASKLOADPS,
28755 IX86_BUILTIN_MASKSTOREPD,
28756 IX86_BUILTIN_MASKSTOREPS,
28757 IX86_BUILTIN_MASKLOADPD256,
28758 IX86_BUILTIN_MASKLOADPS256,
28759 IX86_BUILTIN_MASKSTOREPD256,
28760 IX86_BUILTIN_MASKSTOREPS256,
28761 IX86_BUILTIN_MOVSHDUP256,
28762 IX86_BUILTIN_MOVSLDUP256,
28763 IX86_BUILTIN_MOVDDUP256,
28765 IX86_BUILTIN_SQRTPD256,
28766 IX86_BUILTIN_SQRTPS256,
28767 IX86_BUILTIN_SQRTPS_NR256,
28768 IX86_BUILTIN_RSQRTPS256,
28769 IX86_BUILTIN_RSQRTPS_NR256,
28771 IX86_BUILTIN_RCPPS256,
28773 IX86_BUILTIN_ROUNDPD256,
28774 IX86_BUILTIN_ROUNDPS256,
28776 IX86_BUILTIN_FLOORPD256,
28777 IX86_BUILTIN_CEILPD256,
28778 IX86_BUILTIN_TRUNCPD256,
28779 IX86_BUILTIN_RINTPD256,
28780 IX86_BUILTIN_ROUNDPD_AZ256,
28782 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28783 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28784 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28786 IX86_BUILTIN_FLOORPS256,
28787 IX86_BUILTIN_CEILPS256,
28788 IX86_BUILTIN_TRUNCPS256,
28789 IX86_BUILTIN_RINTPS256,
28790 IX86_BUILTIN_ROUNDPS_AZ256,
28792 IX86_BUILTIN_FLOORPS_SFIX256,
28793 IX86_BUILTIN_CEILPS_SFIX256,
28794 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28796 IX86_BUILTIN_UNPCKHPD256,
28797 IX86_BUILTIN_UNPCKLPD256,
28798 IX86_BUILTIN_UNPCKHPS256,
28799 IX86_BUILTIN_UNPCKLPS256,
28801 IX86_BUILTIN_SI256_SI,
28802 IX86_BUILTIN_PS256_PS,
28803 IX86_BUILTIN_PD256_PD,
28804 IX86_BUILTIN_SI_SI256,
28805 IX86_BUILTIN_PS_PS256,
28806 IX86_BUILTIN_PD_PD256,
28808 IX86_BUILTIN_VTESTZPD,
28809 IX86_BUILTIN_VTESTCPD,
28810 IX86_BUILTIN_VTESTNZCPD,
28811 IX86_BUILTIN_VTESTZPS,
28812 IX86_BUILTIN_VTESTCPS,
28813 IX86_BUILTIN_VTESTNZCPS,
28814 IX86_BUILTIN_VTESTZPD256,
28815 IX86_BUILTIN_VTESTCPD256,
28816 IX86_BUILTIN_VTESTNZCPD256,
28817 IX86_BUILTIN_VTESTZPS256,
28818 IX86_BUILTIN_VTESTCPS256,
28819 IX86_BUILTIN_VTESTNZCPS256,
28820 IX86_BUILTIN_PTESTZ256,
28821 IX86_BUILTIN_PTESTC256,
28822 IX86_BUILTIN_PTESTNZC256,
28824 IX86_BUILTIN_MOVMSKPD256,
28825 IX86_BUILTIN_MOVMSKPS256,
28827 /* AVX2 */
28828 IX86_BUILTIN_MPSADBW256,
28829 IX86_BUILTIN_PABSB256,
28830 IX86_BUILTIN_PABSW256,
28831 IX86_BUILTIN_PABSD256,
28832 IX86_BUILTIN_PACKSSDW256,
28833 IX86_BUILTIN_PACKSSWB256,
28834 IX86_BUILTIN_PACKUSDW256,
28835 IX86_BUILTIN_PACKUSWB256,
28836 IX86_BUILTIN_PADDB256,
28837 IX86_BUILTIN_PADDW256,
28838 IX86_BUILTIN_PADDD256,
28839 IX86_BUILTIN_PADDQ256,
28840 IX86_BUILTIN_PADDSB256,
28841 IX86_BUILTIN_PADDSW256,
28842 IX86_BUILTIN_PADDUSB256,
28843 IX86_BUILTIN_PADDUSW256,
28844 IX86_BUILTIN_PALIGNR256,
28845 IX86_BUILTIN_AND256I,
28846 IX86_BUILTIN_ANDNOT256I,
28847 IX86_BUILTIN_PAVGB256,
28848 IX86_BUILTIN_PAVGW256,
28849 IX86_BUILTIN_PBLENDVB256,
28850 IX86_BUILTIN_PBLENDVW256,
28851 IX86_BUILTIN_PCMPEQB256,
28852 IX86_BUILTIN_PCMPEQW256,
28853 IX86_BUILTIN_PCMPEQD256,
28854 IX86_BUILTIN_PCMPEQQ256,
28855 IX86_BUILTIN_PCMPGTB256,
28856 IX86_BUILTIN_PCMPGTW256,
28857 IX86_BUILTIN_PCMPGTD256,
28858 IX86_BUILTIN_PCMPGTQ256,
28859 IX86_BUILTIN_PHADDW256,
28860 IX86_BUILTIN_PHADDD256,
28861 IX86_BUILTIN_PHADDSW256,
28862 IX86_BUILTIN_PHSUBW256,
28863 IX86_BUILTIN_PHSUBD256,
28864 IX86_BUILTIN_PHSUBSW256,
28865 IX86_BUILTIN_PMADDUBSW256,
28866 IX86_BUILTIN_PMADDWD256,
28867 IX86_BUILTIN_PMAXSB256,
28868 IX86_BUILTIN_PMAXSW256,
28869 IX86_BUILTIN_PMAXSD256,
28870 IX86_BUILTIN_PMAXUB256,
28871 IX86_BUILTIN_PMAXUW256,
28872 IX86_BUILTIN_PMAXUD256,
28873 IX86_BUILTIN_PMINSB256,
28874 IX86_BUILTIN_PMINSW256,
28875 IX86_BUILTIN_PMINSD256,
28876 IX86_BUILTIN_PMINUB256,
28877 IX86_BUILTIN_PMINUW256,
28878 IX86_BUILTIN_PMINUD256,
28879 IX86_BUILTIN_PMOVMSKB256,
28880 IX86_BUILTIN_PMOVSXBW256,
28881 IX86_BUILTIN_PMOVSXBD256,
28882 IX86_BUILTIN_PMOVSXBQ256,
28883 IX86_BUILTIN_PMOVSXWD256,
28884 IX86_BUILTIN_PMOVSXWQ256,
28885 IX86_BUILTIN_PMOVSXDQ256,
28886 IX86_BUILTIN_PMOVZXBW256,
28887 IX86_BUILTIN_PMOVZXBD256,
28888 IX86_BUILTIN_PMOVZXBQ256,
28889 IX86_BUILTIN_PMOVZXWD256,
28890 IX86_BUILTIN_PMOVZXWQ256,
28891 IX86_BUILTIN_PMOVZXDQ256,
28892 IX86_BUILTIN_PMULDQ256,
28893 IX86_BUILTIN_PMULHRSW256,
28894 IX86_BUILTIN_PMULHUW256,
28895 IX86_BUILTIN_PMULHW256,
28896 IX86_BUILTIN_PMULLW256,
28897 IX86_BUILTIN_PMULLD256,
28898 IX86_BUILTIN_PMULUDQ256,
28899 IX86_BUILTIN_POR256,
28900 IX86_BUILTIN_PSADBW256,
28901 IX86_BUILTIN_PSHUFB256,
28902 IX86_BUILTIN_PSHUFD256,
28903 IX86_BUILTIN_PSHUFHW256,
28904 IX86_BUILTIN_PSHUFLW256,
28905 IX86_BUILTIN_PSIGNB256,
28906 IX86_BUILTIN_PSIGNW256,
28907 IX86_BUILTIN_PSIGND256,
28908 IX86_BUILTIN_PSLLDQI256,
28909 IX86_BUILTIN_PSLLWI256,
28910 IX86_BUILTIN_PSLLW256,
28911 IX86_BUILTIN_PSLLDI256,
28912 IX86_BUILTIN_PSLLD256,
28913 IX86_BUILTIN_PSLLQI256,
28914 IX86_BUILTIN_PSLLQ256,
28915 IX86_BUILTIN_PSRAWI256,
28916 IX86_BUILTIN_PSRAW256,
28917 IX86_BUILTIN_PSRADI256,
28918 IX86_BUILTIN_PSRAD256,
28919 IX86_BUILTIN_PSRLDQI256,
28920 IX86_BUILTIN_PSRLWI256,
28921 IX86_BUILTIN_PSRLW256,
28922 IX86_BUILTIN_PSRLDI256,
28923 IX86_BUILTIN_PSRLD256,
28924 IX86_BUILTIN_PSRLQI256,
28925 IX86_BUILTIN_PSRLQ256,
28926 IX86_BUILTIN_PSUBB256,
28927 IX86_BUILTIN_PSUBW256,
28928 IX86_BUILTIN_PSUBD256,
28929 IX86_BUILTIN_PSUBQ256,
28930 IX86_BUILTIN_PSUBSB256,
28931 IX86_BUILTIN_PSUBSW256,
28932 IX86_BUILTIN_PSUBUSB256,
28933 IX86_BUILTIN_PSUBUSW256,
28934 IX86_BUILTIN_PUNPCKHBW256,
28935 IX86_BUILTIN_PUNPCKHWD256,
28936 IX86_BUILTIN_PUNPCKHDQ256,
28937 IX86_BUILTIN_PUNPCKHQDQ256,
28938 IX86_BUILTIN_PUNPCKLBW256,
28939 IX86_BUILTIN_PUNPCKLWD256,
28940 IX86_BUILTIN_PUNPCKLDQ256,
28941 IX86_BUILTIN_PUNPCKLQDQ256,
28942 IX86_BUILTIN_PXOR256,
28943 IX86_BUILTIN_MOVNTDQA256,
28944 IX86_BUILTIN_VBROADCASTSS_PS,
28945 IX86_BUILTIN_VBROADCASTSS_PS256,
28946 IX86_BUILTIN_VBROADCASTSD_PD256,
28947 IX86_BUILTIN_VBROADCASTSI256,
28948 IX86_BUILTIN_PBLENDD256,
28949 IX86_BUILTIN_PBLENDD128,
28950 IX86_BUILTIN_PBROADCASTB256,
28951 IX86_BUILTIN_PBROADCASTW256,
28952 IX86_BUILTIN_PBROADCASTD256,
28953 IX86_BUILTIN_PBROADCASTQ256,
28954 IX86_BUILTIN_PBROADCASTB128,
28955 IX86_BUILTIN_PBROADCASTW128,
28956 IX86_BUILTIN_PBROADCASTD128,
28957 IX86_BUILTIN_PBROADCASTQ128,
28958 IX86_BUILTIN_VPERMVARSI256,
28959 IX86_BUILTIN_VPERMDF256,
28960 IX86_BUILTIN_VPERMVARSF256,
28961 IX86_BUILTIN_VPERMDI256,
28962 IX86_BUILTIN_VPERMTI256,
28963 IX86_BUILTIN_VEXTRACT128I256,
28964 IX86_BUILTIN_VINSERT128I256,
28965 IX86_BUILTIN_MASKLOADD,
28966 IX86_BUILTIN_MASKLOADQ,
28967 IX86_BUILTIN_MASKLOADD256,
28968 IX86_BUILTIN_MASKLOADQ256,
28969 IX86_BUILTIN_MASKSTORED,
28970 IX86_BUILTIN_MASKSTOREQ,
28971 IX86_BUILTIN_MASKSTORED256,
28972 IX86_BUILTIN_MASKSTOREQ256,
28973 IX86_BUILTIN_PSLLVV4DI,
28974 IX86_BUILTIN_PSLLVV2DI,
28975 IX86_BUILTIN_PSLLVV8SI,
28976 IX86_BUILTIN_PSLLVV4SI,
28977 IX86_BUILTIN_PSRAVV8SI,
28978 IX86_BUILTIN_PSRAVV4SI,
28979 IX86_BUILTIN_PSRLVV4DI,
28980 IX86_BUILTIN_PSRLVV2DI,
28981 IX86_BUILTIN_PSRLVV8SI,
28982 IX86_BUILTIN_PSRLVV4SI,
28984 IX86_BUILTIN_GATHERSIV2DF,
28985 IX86_BUILTIN_GATHERSIV4DF,
28986 IX86_BUILTIN_GATHERDIV2DF,
28987 IX86_BUILTIN_GATHERDIV4DF,
28988 IX86_BUILTIN_GATHERSIV4SF,
28989 IX86_BUILTIN_GATHERSIV8SF,
28990 IX86_BUILTIN_GATHERDIV4SF,
28991 IX86_BUILTIN_GATHERDIV8SF,
28992 IX86_BUILTIN_GATHERSIV2DI,
28993 IX86_BUILTIN_GATHERSIV4DI,
28994 IX86_BUILTIN_GATHERDIV2DI,
28995 IX86_BUILTIN_GATHERDIV4DI,
28996 IX86_BUILTIN_GATHERSIV4SI,
28997 IX86_BUILTIN_GATHERSIV8SI,
28998 IX86_BUILTIN_GATHERDIV4SI,
28999 IX86_BUILTIN_GATHERDIV8SI,
29001 /* AVX512F */
29002 IX86_BUILTIN_SI512_SI256,
29003 IX86_BUILTIN_PD512_PD256,
29004 IX86_BUILTIN_PS512_PS256,
29005 IX86_BUILTIN_SI512_SI,
29006 IX86_BUILTIN_PD512_PD,
29007 IX86_BUILTIN_PS512_PS,
29008 IX86_BUILTIN_ADDPD512,
29009 IX86_BUILTIN_ADDPS512,
29010 IX86_BUILTIN_ADDSD_ROUND,
29011 IX86_BUILTIN_ADDSS_ROUND,
29012 IX86_BUILTIN_ALIGND512,
29013 IX86_BUILTIN_ALIGNQ512,
29014 IX86_BUILTIN_BLENDMD512,
29015 IX86_BUILTIN_BLENDMPD512,
29016 IX86_BUILTIN_BLENDMPS512,
29017 IX86_BUILTIN_BLENDMQ512,
29018 IX86_BUILTIN_BROADCASTF32X4_512,
29019 IX86_BUILTIN_BROADCASTF64X4_512,
29020 IX86_BUILTIN_BROADCASTI32X4_512,
29021 IX86_BUILTIN_BROADCASTI64X4_512,
29022 IX86_BUILTIN_BROADCASTSD512,
29023 IX86_BUILTIN_BROADCASTSS512,
29024 IX86_BUILTIN_CMPD512,
29025 IX86_BUILTIN_CMPPD512,
29026 IX86_BUILTIN_CMPPS512,
29027 IX86_BUILTIN_CMPQ512,
29028 IX86_BUILTIN_CMPSD_MASK,
29029 IX86_BUILTIN_CMPSS_MASK,
29030 IX86_BUILTIN_COMIDF,
29031 IX86_BUILTIN_COMISF,
29032 IX86_BUILTIN_COMPRESSPD512,
29033 IX86_BUILTIN_COMPRESSPDSTORE512,
29034 IX86_BUILTIN_COMPRESSPS512,
29035 IX86_BUILTIN_COMPRESSPSSTORE512,
29036 IX86_BUILTIN_CVTDQ2PD512,
29037 IX86_BUILTIN_CVTDQ2PS512,
29038 IX86_BUILTIN_CVTPD2DQ512,
29039 IX86_BUILTIN_CVTPD2PS512,
29040 IX86_BUILTIN_CVTPD2UDQ512,
29041 IX86_BUILTIN_CVTPH2PS512,
29042 IX86_BUILTIN_CVTPS2DQ512,
29043 IX86_BUILTIN_CVTPS2PD512,
29044 IX86_BUILTIN_CVTPS2PH512,
29045 IX86_BUILTIN_CVTPS2UDQ512,
29046 IX86_BUILTIN_CVTSD2SS_ROUND,
29047 IX86_BUILTIN_CVTSI2SD64,
29048 IX86_BUILTIN_CVTSI2SS32,
29049 IX86_BUILTIN_CVTSI2SS64,
29050 IX86_BUILTIN_CVTSS2SD_ROUND,
29051 IX86_BUILTIN_CVTTPD2DQ512,
29052 IX86_BUILTIN_CVTTPD2UDQ512,
29053 IX86_BUILTIN_CVTTPS2DQ512,
29054 IX86_BUILTIN_CVTTPS2UDQ512,
29055 IX86_BUILTIN_CVTUDQ2PD512,
29056 IX86_BUILTIN_CVTUDQ2PS512,
29057 IX86_BUILTIN_CVTUSI2SD32,
29058 IX86_BUILTIN_CVTUSI2SD64,
29059 IX86_BUILTIN_CVTUSI2SS32,
29060 IX86_BUILTIN_CVTUSI2SS64,
29061 IX86_BUILTIN_DIVPD512,
29062 IX86_BUILTIN_DIVPS512,
29063 IX86_BUILTIN_DIVSD_ROUND,
29064 IX86_BUILTIN_DIVSS_ROUND,
29065 IX86_BUILTIN_EXPANDPD512,
29066 IX86_BUILTIN_EXPANDPD512Z,
29067 IX86_BUILTIN_EXPANDPDLOAD512,
29068 IX86_BUILTIN_EXPANDPDLOAD512Z,
29069 IX86_BUILTIN_EXPANDPS512,
29070 IX86_BUILTIN_EXPANDPS512Z,
29071 IX86_BUILTIN_EXPANDPSLOAD512,
29072 IX86_BUILTIN_EXPANDPSLOAD512Z,
29073 IX86_BUILTIN_EXTRACTF32X4,
29074 IX86_BUILTIN_EXTRACTF64X4,
29075 IX86_BUILTIN_EXTRACTI32X4,
29076 IX86_BUILTIN_EXTRACTI64X4,
29077 IX86_BUILTIN_FIXUPIMMPD512_MASK,
29078 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29079 IX86_BUILTIN_FIXUPIMMPS512_MASK,
29080 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29081 IX86_BUILTIN_FIXUPIMMSD128_MASK,
29082 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29083 IX86_BUILTIN_FIXUPIMMSS128_MASK,
29084 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29085 IX86_BUILTIN_GETEXPPD512,
29086 IX86_BUILTIN_GETEXPPS512,
29087 IX86_BUILTIN_GETEXPSD128,
29088 IX86_BUILTIN_GETEXPSS128,
29089 IX86_BUILTIN_GETMANTPD512,
29090 IX86_BUILTIN_GETMANTPS512,
29091 IX86_BUILTIN_GETMANTSD128,
29092 IX86_BUILTIN_GETMANTSS128,
29093 IX86_BUILTIN_INSERTF32X4,
29094 IX86_BUILTIN_INSERTF64X4,
29095 IX86_BUILTIN_INSERTI32X4,
29096 IX86_BUILTIN_INSERTI64X4,
29097 IX86_BUILTIN_LOADAPD512,
29098 IX86_BUILTIN_LOADAPS512,
29099 IX86_BUILTIN_LOADDQUDI512,
29100 IX86_BUILTIN_LOADDQUSI512,
29101 IX86_BUILTIN_LOADUPD512,
29102 IX86_BUILTIN_LOADUPS512,
29103 IX86_BUILTIN_MAXPD512,
29104 IX86_BUILTIN_MAXPS512,
29105 IX86_BUILTIN_MAXSD_ROUND,
29106 IX86_BUILTIN_MAXSS_ROUND,
29107 IX86_BUILTIN_MINPD512,
29108 IX86_BUILTIN_MINPS512,
29109 IX86_BUILTIN_MINSD_ROUND,
29110 IX86_BUILTIN_MINSS_ROUND,
29111 IX86_BUILTIN_MOVAPD512,
29112 IX86_BUILTIN_MOVAPS512,
29113 IX86_BUILTIN_MOVDDUP512,
29114 IX86_BUILTIN_MOVDQA32LOAD512,
29115 IX86_BUILTIN_MOVDQA32STORE512,
29116 IX86_BUILTIN_MOVDQA32_512,
29117 IX86_BUILTIN_MOVDQA64LOAD512,
29118 IX86_BUILTIN_MOVDQA64STORE512,
29119 IX86_BUILTIN_MOVDQA64_512,
29120 IX86_BUILTIN_MOVNTDQ512,
29121 IX86_BUILTIN_MOVNTDQA512,
29122 IX86_BUILTIN_MOVNTPD512,
29123 IX86_BUILTIN_MOVNTPS512,
29124 IX86_BUILTIN_MOVSHDUP512,
29125 IX86_BUILTIN_MOVSLDUP512,
29126 IX86_BUILTIN_MULPD512,
29127 IX86_BUILTIN_MULPS512,
29128 IX86_BUILTIN_MULSD_ROUND,
29129 IX86_BUILTIN_MULSS_ROUND,
29130 IX86_BUILTIN_PABSD512,
29131 IX86_BUILTIN_PABSQ512,
29132 IX86_BUILTIN_PADDD512,
29133 IX86_BUILTIN_PADDQ512,
29134 IX86_BUILTIN_PANDD512,
29135 IX86_BUILTIN_PANDND512,
29136 IX86_BUILTIN_PANDNQ512,
29137 IX86_BUILTIN_PANDQ512,
29138 IX86_BUILTIN_PBROADCASTD512,
29139 IX86_BUILTIN_PBROADCASTD512_GPR,
29140 IX86_BUILTIN_PBROADCASTMB512,
29141 IX86_BUILTIN_PBROADCASTMW512,
29142 IX86_BUILTIN_PBROADCASTQ512,
29143 IX86_BUILTIN_PBROADCASTQ512_GPR,
29144 IX86_BUILTIN_PCMPEQD512_MASK,
29145 IX86_BUILTIN_PCMPEQQ512_MASK,
29146 IX86_BUILTIN_PCMPGTD512_MASK,
29147 IX86_BUILTIN_PCMPGTQ512_MASK,
29148 IX86_BUILTIN_PCOMPRESSD512,
29149 IX86_BUILTIN_PCOMPRESSDSTORE512,
29150 IX86_BUILTIN_PCOMPRESSQ512,
29151 IX86_BUILTIN_PCOMPRESSQSTORE512,
29152 IX86_BUILTIN_PEXPANDD512,
29153 IX86_BUILTIN_PEXPANDD512Z,
29154 IX86_BUILTIN_PEXPANDDLOAD512,
29155 IX86_BUILTIN_PEXPANDDLOAD512Z,
29156 IX86_BUILTIN_PEXPANDQ512,
29157 IX86_BUILTIN_PEXPANDQ512Z,
29158 IX86_BUILTIN_PEXPANDQLOAD512,
29159 IX86_BUILTIN_PEXPANDQLOAD512Z,
29160 IX86_BUILTIN_PMAXSD512,
29161 IX86_BUILTIN_PMAXSQ512,
29162 IX86_BUILTIN_PMAXUD512,
29163 IX86_BUILTIN_PMAXUQ512,
29164 IX86_BUILTIN_PMINSD512,
29165 IX86_BUILTIN_PMINSQ512,
29166 IX86_BUILTIN_PMINUD512,
29167 IX86_BUILTIN_PMINUQ512,
29168 IX86_BUILTIN_PMOVDB512,
29169 IX86_BUILTIN_PMOVDB512_MEM,
29170 IX86_BUILTIN_PMOVDW512,
29171 IX86_BUILTIN_PMOVDW512_MEM,
29172 IX86_BUILTIN_PMOVQB512,
29173 IX86_BUILTIN_PMOVQB512_MEM,
29174 IX86_BUILTIN_PMOVQD512,
29175 IX86_BUILTIN_PMOVQD512_MEM,
29176 IX86_BUILTIN_PMOVQW512,
29177 IX86_BUILTIN_PMOVQW512_MEM,
29178 IX86_BUILTIN_PMOVSDB512,
29179 IX86_BUILTIN_PMOVSDB512_MEM,
29180 IX86_BUILTIN_PMOVSDW512,
29181 IX86_BUILTIN_PMOVSDW512_MEM,
29182 IX86_BUILTIN_PMOVSQB512,
29183 IX86_BUILTIN_PMOVSQB512_MEM,
29184 IX86_BUILTIN_PMOVSQD512,
29185 IX86_BUILTIN_PMOVSQD512_MEM,
29186 IX86_BUILTIN_PMOVSQW512,
29187 IX86_BUILTIN_PMOVSQW512_MEM,
29188 IX86_BUILTIN_PMOVSXBD512,
29189 IX86_BUILTIN_PMOVSXBQ512,
29190 IX86_BUILTIN_PMOVSXDQ512,
29191 IX86_BUILTIN_PMOVSXWD512,
29192 IX86_BUILTIN_PMOVSXWQ512,
29193 IX86_BUILTIN_PMOVUSDB512,
29194 IX86_BUILTIN_PMOVUSDB512_MEM,
29195 IX86_BUILTIN_PMOVUSDW512,
29196 IX86_BUILTIN_PMOVUSDW512_MEM,
29197 IX86_BUILTIN_PMOVUSQB512,
29198 IX86_BUILTIN_PMOVUSQB512_MEM,
29199 IX86_BUILTIN_PMOVUSQD512,
29200 IX86_BUILTIN_PMOVUSQD512_MEM,
29201 IX86_BUILTIN_PMOVUSQW512,
29202 IX86_BUILTIN_PMOVUSQW512_MEM,
29203 IX86_BUILTIN_PMOVZXBD512,
29204 IX86_BUILTIN_PMOVZXBQ512,
29205 IX86_BUILTIN_PMOVZXDQ512,
29206 IX86_BUILTIN_PMOVZXWD512,
29207 IX86_BUILTIN_PMOVZXWQ512,
29208 IX86_BUILTIN_PMULDQ512,
29209 IX86_BUILTIN_PMULLD512,
29210 IX86_BUILTIN_PMULUDQ512,
29211 IX86_BUILTIN_PORD512,
29212 IX86_BUILTIN_PORQ512,
29213 IX86_BUILTIN_PROLD512,
29214 IX86_BUILTIN_PROLQ512,
29215 IX86_BUILTIN_PROLVD512,
29216 IX86_BUILTIN_PROLVQ512,
29217 IX86_BUILTIN_PRORD512,
29218 IX86_BUILTIN_PRORQ512,
29219 IX86_BUILTIN_PRORVD512,
29220 IX86_BUILTIN_PRORVQ512,
29221 IX86_BUILTIN_PSHUFD512,
29222 IX86_BUILTIN_PSLLD512,
29223 IX86_BUILTIN_PSLLDI512,
29224 IX86_BUILTIN_PSLLQ512,
29225 IX86_BUILTIN_PSLLQI512,
29226 IX86_BUILTIN_PSLLVV16SI,
29227 IX86_BUILTIN_PSLLVV8DI,
29228 IX86_BUILTIN_PSRAD512,
29229 IX86_BUILTIN_PSRADI512,
29230 IX86_BUILTIN_PSRAQ512,
29231 IX86_BUILTIN_PSRAQI512,
29232 IX86_BUILTIN_PSRAVV16SI,
29233 IX86_BUILTIN_PSRAVV8DI,
29234 IX86_BUILTIN_PSRLD512,
29235 IX86_BUILTIN_PSRLDI512,
29236 IX86_BUILTIN_PSRLQ512,
29237 IX86_BUILTIN_PSRLQI512,
29238 IX86_BUILTIN_PSRLVV16SI,
29239 IX86_BUILTIN_PSRLVV8DI,
29240 IX86_BUILTIN_PSUBD512,
29241 IX86_BUILTIN_PSUBQ512,
29242 IX86_BUILTIN_PTESTMD512,
29243 IX86_BUILTIN_PTESTMQ512,
29244 IX86_BUILTIN_PTESTNMD512,
29245 IX86_BUILTIN_PTESTNMQ512,
29246 IX86_BUILTIN_PUNPCKHDQ512,
29247 IX86_BUILTIN_PUNPCKHQDQ512,
29248 IX86_BUILTIN_PUNPCKLDQ512,
29249 IX86_BUILTIN_PUNPCKLQDQ512,
29250 IX86_BUILTIN_PXORD512,
29251 IX86_BUILTIN_PXORQ512,
29252 IX86_BUILTIN_RCP14PD512,
29253 IX86_BUILTIN_RCP14PS512,
29254 IX86_BUILTIN_RCP14SD,
29255 IX86_BUILTIN_RCP14SS,
29256 IX86_BUILTIN_RNDSCALEPD,
29257 IX86_BUILTIN_RNDSCALEPS,
29258 IX86_BUILTIN_RNDSCALESD,
29259 IX86_BUILTIN_RNDSCALESS,
29260 IX86_BUILTIN_RSQRT14PD512,
29261 IX86_BUILTIN_RSQRT14PS512,
29262 IX86_BUILTIN_RSQRT14SD,
29263 IX86_BUILTIN_RSQRT14SS,
29264 IX86_BUILTIN_SCALEFPD512,
29265 IX86_BUILTIN_SCALEFPS512,
29266 IX86_BUILTIN_SCALEFSD,
29267 IX86_BUILTIN_SCALEFSS,
29268 IX86_BUILTIN_SHUFPD512,
29269 IX86_BUILTIN_SHUFPS512,
29270 IX86_BUILTIN_SHUF_F32x4,
29271 IX86_BUILTIN_SHUF_F64x2,
29272 IX86_BUILTIN_SHUF_I32x4,
29273 IX86_BUILTIN_SHUF_I64x2,
29274 IX86_BUILTIN_SQRTPD512,
29275 IX86_BUILTIN_SQRTPD512_MASK,
29276 IX86_BUILTIN_SQRTPS512_MASK,
29277 IX86_BUILTIN_SQRTPS_NR512,
29278 IX86_BUILTIN_SQRTSD_ROUND,
29279 IX86_BUILTIN_SQRTSS_ROUND,
29280 IX86_BUILTIN_STOREAPD512,
29281 IX86_BUILTIN_STOREAPS512,
29282 IX86_BUILTIN_STOREDQUDI512,
29283 IX86_BUILTIN_STOREDQUSI512,
29284 IX86_BUILTIN_STOREUPD512,
29285 IX86_BUILTIN_STOREUPS512,
29286 IX86_BUILTIN_SUBPD512,
29287 IX86_BUILTIN_SUBPS512,
29288 IX86_BUILTIN_SUBSD_ROUND,
29289 IX86_BUILTIN_SUBSS_ROUND,
29290 IX86_BUILTIN_UCMPD512,
29291 IX86_BUILTIN_UCMPQ512,
29292 IX86_BUILTIN_UNPCKHPD512,
29293 IX86_BUILTIN_UNPCKHPS512,
29294 IX86_BUILTIN_UNPCKLPD512,
29295 IX86_BUILTIN_UNPCKLPS512,
29296 IX86_BUILTIN_VCVTSD2SI32,
29297 IX86_BUILTIN_VCVTSD2SI64,
29298 IX86_BUILTIN_VCVTSD2USI32,
29299 IX86_BUILTIN_VCVTSD2USI64,
29300 IX86_BUILTIN_VCVTSS2SI32,
29301 IX86_BUILTIN_VCVTSS2SI64,
29302 IX86_BUILTIN_VCVTSS2USI32,
29303 IX86_BUILTIN_VCVTSS2USI64,
29304 IX86_BUILTIN_VCVTTSD2SI32,
29305 IX86_BUILTIN_VCVTTSD2SI64,
29306 IX86_BUILTIN_VCVTTSD2USI32,
29307 IX86_BUILTIN_VCVTTSD2USI64,
29308 IX86_BUILTIN_VCVTTSS2SI32,
29309 IX86_BUILTIN_VCVTTSS2SI64,
29310 IX86_BUILTIN_VCVTTSS2USI32,
29311 IX86_BUILTIN_VCVTTSS2USI64,
29312 IX86_BUILTIN_VFMADDPD512_MASK,
29313 IX86_BUILTIN_VFMADDPD512_MASK3,
29314 IX86_BUILTIN_VFMADDPD512_MASKZ,
29315 IX86_BUILTIN_VFMADDPS512_MASK,
29316 IX86_BUILTIN_VFMADDPS512_MASK3,
29317 IX86_BUILTIN_VFMADDPS512_MASKZ,
29318 IX86_BUILTIN_VFMADDSD3_ROUND,
29319 IX86_BUILTIN_VFMADDSS3_ROUND,
29320 IX86_BUILTIN_VFMADDSUBPD512_MASK,
29321 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29322 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29323 IX86_BUILTIN_VFMADDSUBPS512_MASK,
29324 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29325 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29326 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29327 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29328 IX86_BUILTIN_VFMSUBPD512_MASK3,
29329 IX86_BUILTIN_VFMSUBPS512_MASK3,
29330 IX86_BUILTIN_VFMSUBSD3_MASK3,
29331 IX86_BUILTIN_VFMSUBSS3_MASK3,
29332 IX86_BUILTIN_VFNMADDPD512_MASK,
29333 IX86_BUILTIN_VFNMADDPS512_MASK,
29334 IX86_BUILTIN_VFNMSUBPD512_MASK,
29335 IX86_BUILTIN_VFNMSUBPD512_MASK3,
29336 IX86_BUILTIN_VFNMSUBPS512_MASK,
29337 IX86_BUILTIN_VFNMSUBPS512_MASK3,
29338 IX86_BUILTIN_VPCLZCNTD512,
29339 IX86_BUILTIN_VPCLZCNTQ512,
29340 IX86_BUILTIN_VPCONFLICTD512,
29341 IX86_BUILTIN_VPCONFLICTQ512,
29342 IX86_BUILTIN_VPERMDF512,
29343 IX86_BUILTIN_VPERMDI512,
29344 IX86_BUILTIN_VPERMI2VARD512,
29345 IX86_BUILTIN_VPERMI2VARPD512,
29346 IX86_BUILTIN_VPERMI2VARPS512,
29347 IX86_BUILTIN_VPERMI2VARQ512,
29348 IX86_BUILTIN_VPERMILPD512,
29349 IX86_BUILTIN_VPERMILPS512,
29350 IX86_BUILTIN_VPERMILVARPD512,
29351 IX86_BUILTIN_VPERMILVARPS512,
29352 IX86_BUILTIN_VPERMT2VARD512,
29353 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29354 IX86_BUILTIN_VPERMT2VARPD512,
29355 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29356 IX86_BUILTIN_VPERMT2VARPS512,
29357 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29358 IX86_BUILTIN_VPERMT2VARQ512,
29359 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29360 IX86_BUILTIN_VPERMVARDF512,
29361 IX86_BUILTIN_VPERMVARDI512,
29362 IX86_BUILTIN_VPERMVARSF512,
29363 IX86_BUILTIN_VPERMVARSI512,
29364 IX86_BUILTIN_VTERNLOGD512_MASK,
29365 IX86_BUILTIN_VTERNLOGD512_MASKZ,
29366 IX86_BUILTIN_VTERNLOGQ512_MASK,
29367 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29369 /* Mask arithmetic operations */
29370 IX86_BUILTIN_KAND16,
29371 IX86_BUILTIN_KANDN16,
29372 IX86_BUILTIN_KNOT16,
29373 IX86_BUILTIN_KOR16,
29374 IX86_BUILTIN_KORTESTC16,
29375 IX86_BUILTIN_KORTESTZ16,
29376 IX86_BUILTIN_KUNPCKBW,
29377 IX86_BUILTIN_KXNOR16,
29378 IX86_BUILTIN_KXOR16,
29379 IX86_BUILTIN_KMOV16,
29381 /* AVX512VL. */
29382 IX86_BUILTIN_PMOVUSQD256_MEM,
29383 IX86_BUILTIN_PMOVUSQD128_MEM,
29384 IX86_BUILTIN_PMOVSQD256_MEM,
29385 IX86_BUILTIN_PMOVSQD128_MEM,
29386 IX86_BUILTIN_PMOVQD256_MEM,
29387 IX86_BUILTIN_PMOVQD128_MEM,
29388 IX86_BUILTIN_PMOVUSQW256_MEM,
29389 IX86_BUILTIN_PMOVUSQW128_MEM,
29390 IX86_BUILTIN_PMOVSQW256_MEM,
29391 IX86_BUILTIN_PMOVSQW128_MEM,
29392 IX86_BUILTIN_PMOVQW256_MEM,
29393 IX86_BUILTIN_PMOVQW128_MEM,
29394 IX86_BUILTIN_PMOVUSQB256_MEM,
29395 IX86_BUILTIN_PMOVUSQB128_MEM,
29396 IX86_BUILTIN_PMOVSQB256_MEM,
29397 IX86_BUILTIN_PMOVSQB128_MEM,
29398 IX86_BUILTIN_PMOVQB256_MEM,
29399 IX86_BUILTIN_PMOVQB128_MEM,
29400 IX86_BUILTIN_PMOVUSDW256_MEM,
29401 IX86_BUILTIN_PMOVUSDW128_MEM,
29402 IX86_BUILTIN_PMOVSDW256_MEM,
29403 IX86_BUILTIN_PMOVSDW128_MEM,
29404 IX86_BUILTIN_PMOVDW256_MEM,
29405 IX86_BUILTIN_PMOVDW128_MEM,
29406 IX86_BUILTIN_PMOVUSDB256_MEM,
29407 IX86_BUILTIN_PMOVUSDB128_MEM,
29408 IX86_BUILTIN_PMOVSDB256_MEM,
29409 IX86_BUILTIN_PMOVSDB128_MEM,
29410 IX86_BUILTIN_PMOVDB256_MEM,
29411 IX86_BUILTIN_PMOVDB128_MEM,
29412 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29413 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29414 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29415 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29416 IX86_BUILTIN_MOVDQA64STORE256_MASK,
29417 IX86_BUILTIN_MOVDQA64STORE128_MASK,
29418 IX86_BUILTIN_MOVDQA32STORE256_MASK,
29419 IX86_BUILTIN_MOVDQA32STORE128_MASK,
29420 IX86_BUILTIN_LOADAPD256_MASK,
29421 IX86_BUILTIN_LOADAPD128_MASK,
29422 IX86_BUILTIN_LOADAPS256_MASK,
29423 IX86_BUILTIN_LOADAPS128_MASK,
29424 IX86_BUILTIN_STOREAPD256_MASK,
29425 IX86_BUILTIN_STOREAPD128_MASK,
29426 IX86_BUILTIN_STOREAPS256_MASK,
29427 IX86_BUILTIN_STOREAPS128_MASK,
29428 IX86_BUILTIN_LOADUPD256_MASK,
29429 IX86_BUILTIN_LOADUPD128_MASK,
29430 IX86_BUILTIN_LOADUPS256_MASK,
29431 IX86_BUILTIN_LOADUPS128_MASK,
29432 IX86_BUILTIN_STOREUPD256_MASK,
29433 IX86_BUILTIN_STOREUPD128_MASK,
29434 IX86_BUILTIN_STOREUPS256_MASK,
29435 IX86_BUILTIN_STOREUPS128_MASK,
29436 IX86_BUILTIN_LOADDQUDI256_MASK,
29437 IX86_BUILTIN_LOADDQUDI128_MASK,
29438 IX86_BUILTIN_LOADDQUSI256_MASK,
29439 IX86_BUILTIN_LOADDQUSI128_MASK,
29440 IX86_BUILTIN_LOADDQUHI256_MASK,
29441 IX86_BUILTIN_LOADDQUHI128_MASK,
29442 IX86_BUILTIN_LOADDQUQI256_MASK,
29443 IX86_BUILTIN_LOADDQUQI128_MASK,
29444 IX86_BUILTIN_STOREDQUDI256_MASK,
29445 IX86_BUILTIN_STOREDQUDI128_MASK,
29446 IX86_BUILTIN_STOREDQUSI256_MASK,
29447 IX86_BUILTIN_STOREDQUSI128_MASK,
29448 IX86_BUILTIN_STOREDQUHI256_MASK,
29449 IX86_BUILTIN_STOREDQUHI128_MASK,
29450 IX86_BUILTIN_STOREDQUQI256_MASK,
29451 IX86_BUILTIN_STOREDQUQI128_MASK,
29452 IX86_BUILTIN_COMPRESSPDSTORE256,
29453 IX86_BUILTIN_COMPRESSPDSTORE128,
29454 IX86_BUILTIN_COMPRESSPSSTORE256,
29455 IX86_BUILTIN_COMPRESSPSSTORE128,
29456 IX86_BUILTIN_PCOMPRESSQSTORE256,
29457 IX86_BUILTIN_PCOMPRESSQSTORE128,
29458 IX86_BUILTIN_PCOMPRESSDSTORE256,
29459 IX86_BUILTIN_PCOMPRESSDSTORE128,
29460 IX86_BUILTIN_EXPANDPDLOAD256,
29461 IX86_BUILTIN_EXPANDPDLOAD128,
29462 IX86_BUILTIN_EXPANDPSLOAD256,
29463 IX86_BUILTIN_EXPANDPSLOAD128,
29464 IX86_BUILTIN_PEXPANDQLOAD256,
29465 IX86_BUILTIN_PEXPANDQLOAD128,
29466 IX86_BUILTIN_PEXPANDDLOAD256,
29467 IX86_BUILTIN_PEXPANDDLOAD128,
29468 IX86_BUILTIN_EXPANDPDLOAD256Z,
29469 IX86_BUILTIN_EXPANDPDLOAD128Z,
29470 IX86_BUILTIN_EXPANDPSLOAD256Z,
29471 IX86_BUILTIN_EXPANDPSLOAD128Z,
29472 IX86_BUILTIN_PEXPANDQLOAD256Z,
29473 IX86_BUILTIN_PEXPANDQLOAD128Z,
29474 IX86_BUILTIN_PEXPANDDLOAD256Z,
29475 IX86_BUILTIN_PEXPANDDLOAD128Z,
29476 IX86_BUILTIN_PALIGNR256_MASK,
29477 IX86_BUILTIN_PALIGNR128_MASK,
29478 IX86_BUILTIN_MOVDQA64_256_MASK,
29479 IX86_BUILTIN_MOVDQA64_128_MASK,
29480 IX86_BUILTIN_MOVDQA32_256_MASK,
29481 IX86_BUILTIN_MOVDQA32_128_MASK,
29482 IX86_BUILTIN_MOVAPD256_MASK,
29483 IX86_BUILTIN_MOVAPD128_MASK,
29484 IX86_BUILTIN_MOVAPS256_MASK,
29485 IX86_BUILTIN_MOVAPS128_MASK,
29486 IX86_BUILTIN_MOVDQUHI256_MASK,
29487 IX86_BUILTIN_MOVDQUHI128_MASK,
29488 IX86_BUILTIN_MOVDQUQI256_MASK,
29489 IX86_BUILTIN_MOVDQUQI128_MASK,
29490 IX86_BUILTIN_MINPS128_MASK,
29491 IX86_BUILTIN_MAXPS128_MASK,
29492 IX86_BUILTIN_MINPD128_MASK,
29493 IX86_BUILTIN_MAXPD128_MASK,
29494 IX86_BUILTIN_MAXPD256_MASK,
29495 IX86_BUILTIN_MAXPS256_MASK,
29496 IX86_BUILTIN_MINPD256_MASK,
29497 IX86_BUILTIN_MINPS256_MASK,
29498 IX86_BUILTIN_MULPS128_MASK,
29499 IX86_BUILTIN_DIVPS128_MASK,
29500 IX86_BUILTIN_MULPD128_MASK,
29501 IX86_BUILTIN_DIVPD128_MASK,
29502 IX86_BUILTIN_DIVPD256_MASK,
29503 IX86_BUILTIN_DIVPS256_MASK,
29504 IX86_BUILTIN_MULPD256_MASK,
29505 IX86_BUILTIN_MULPS256_MASK,
29506 IX86_BUILTIN_ADDPD128_MASK,
29507 IX86_BUILTIN_ADDPD256_MASK,
29508 IX86_BUILTIN_ADDPS128_MASK,
29509 IX86_BUILTIN_ADDPS256_MASK,
29510 IX86_BUILTIN_SUBPD128_MASK,
29511 IX86_BUILTIN_SUBPD256_MASK,
29512 IX86_BUILTIN_SUBPS128_MASK,
29513 IX86_BUILTIN_SUBPS256_MASK,
29514 IX86_BUILTIN_XORPD256_MASK,
29515 IX86_BUILTIN_XORPD128_MASK,
29516 IX86_BUILTIN_XORPS256_MASK,
29517 IX86_BUILTIN_XORPS128_MASK,
29518 IX86_BUILTIN_ORPD256_MASK,
29519 IX86_BUILTIN_ORPD128_MASK,
29520 IX86_BUILTIN_ORPS256_MASK,
29521 IX86_BUILTIN_ORPS128_MASK,
29522 IX86_BUILTIN_BROADCASTF32x2_256,
29523 IX86_BUILTIN_BROADCASTI32x2_256,
29524 IX86_BUILTIN_BROADCASTI32x2_128,
29525 IX86_BUILTIN_BROADCASTF64X2_256,
29526 IX86_BUILTIN_BROADCASTI64X2_256,
29527 IX86_BUILTIN_BROADCASTF32X4_256,
29528 IX86_BUILTIN_BROADCASTI32X4_256,
29529 IX86_BUILTIN_EXTRACTF32X4_256,
29530 IX86_BUILTIN_EXTRACTI32X4_256,
29531 IX86_BUILTIN_DBPSADBW256,
29532 IX86_BUILTIN_DBPSADBW128,
29533 IX86_BUILTIN_CVTTPD2QQ256,
29534 IX86_BUILTIN_CVTTPD2QQ128,
29535 IX86_BUILTIN_CVTTPD2UQQ256,
29536 IX86_BUILTIN_CVTTPD2UQQ128,
29537 IX86_BUILTIN_CVTPD2QQ256,
29538 IX86_BUILTIN_CVTPD2QQ128,
29539 IX86_BUILTIN_CVTPD2UQQ256,
29540 IX86_BUILTIN_CVTPD2UQQ128,
29541 IX86_BUILTIN_CVTPD2UDQ256_MASK,
29542 IX86_BUILTIN_CVTPD2UDQ128_MASK,
29543 IX86_BUILTIN_CVTTPS2QQ256,
29544 IX86_BUILTIN_CVTTPS2QQ128,
29545 IX86_BUILTIN_CVTTPS2UQQ256,
29546 IX86_BUILTIN_CVTTPS2UQQ128,
29547 IX86_BUILTIN_CVTTPS2DQ256_MASK,
29548 IX86_BUILTIN_CVTTPS2DQ128_MASK,
29549 IX86_BUILTIN_CVTTPS2UDQ256,
29550 IX86_BUILTIN_CVTTPS2UDQ128,
29551 IX86_BUILTIN_CVTTPD2DQ256_MASK,
29552 IX86_BUILTIN_CVTTPD2DQ128_MASK,
29553 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29554 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29555 IX86_BUILTIN_CVTPD2DQ256_MASK,
29556 IX86_BUILTIN_CVTPD2DQ128_MASK,
29557 IX86_BUILTIN_CVTDQ2PD256_MASK,
29558 IX86_BUILTIN_CVTDQ2PD128_MASK,
29559 IX86_BUILTIN_CVTUDQ2PD256_MASK,
29560 IX86_BUILTIN_CVTUDQ2PD128_MASK,
29561 IX86_BUILTIN_CVTDQ2PS256_MASK,
29562 IX86_BUILTIN_CVTDQ2PS128_MASK,
29563 IX86_BUILTIN_CVTUDQ2PS256_MASK,
29564 IX86_BUILTIN_CVTUDQ2PS128_MASK,
29565 IX86_BUILTIN_CVTPS2PD256_MASK,
29566 IX86_BUILTIN_CVTPS2PD128_MASK,
29567 IX86_BUILTIN_PBROADCASTB256_MASK,
29568 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29569 IX86_BUILTIN_PBROADCASTB128_MASK,
29570 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29571 IX86_BUILTIN_PBROADCASTW256_MASK,
29572 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29573 IX86_BUILTIN_PBROADCASTW128_MASK,
29574 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29575 IX86_BUILTIN_PBROADCASTD256_MASK,
29576 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29577 IX86_BUILTIN_PBROADCASTD128_MASK,
29578 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29579 IX86_BUILTIN_PBROADCASTQ256_MASK,
29580 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29581 IX86_BUILTIN_PBROADCASTQ128_MASK,
29582 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29583 IX86_BUILTIN_BROADCASTSS256,
29584 IX86_BUILTIN_BROADCASTSS128,
29585 IX86_BUILTIN_BROADCASTSD256,
29586 IX86_BUILTIN_EXTRACTF64X2_256,
29587 IX86_BUILTIN_EXTRACTI64X2_256,
29588 IX86_BUILTIN_INSERTF32X4_256,
29589 IX86_BUILTIN_INSERTI32X4_256,
29590 IX86_BUILTIN_PMOVSXBW256_MASK,
29591 IX86_BUILTIN_PMOVSXBW128_MASK,
29592 IX86_BUILTIN_PMOVSXBD256_MASK,
29593 IX86_BUILTIN_PMOVSXBD128_MASK,
29594 IX86_BUILTIN_PMOVSXBQ256_MASK,
29595 IX86_BUILTIN_PMOVSXBQ128_MASK,
29596 IX86_BUILTIN_PMOVSXWD256_MASK,
29597 IX86_BUILTIN_PMOVSXWD128_MASK,
29598 IX86_BUILTIN_PMOVSXWQ256_MASK,
29599 IX86_BUILTIN_PMOVSXWQ128_MASK,
29600 IX86_BUILTIN_PMOVSXDQ256_MASK,
29601 IX86_BUILTIN_PMOVSXDQ128_MASK,
29602 IX86_BUILTIN_PMOVZXBW256_MASK,
29603 IX86_BUILTIN_PMOVZXBW128_MASK,
29604 IX86_BUILTIN_PMOVZXBD256_MASK,
29605 IX86_BUILTIN_PMOVZXBD128_MASK,
29606 IX86_BUILTIN_PMOVZXBQ256_MASK,
29607 IX86_BUILTIN_PMOVZXBQ128_MASK,
29608 IX86_BUILTIN_PMOVZXWD256_MASK,
29609 IX86_BUILTIN_PMOVZXWD128_MASK,
29610 IX86_BUILTIN_PMOVZXWQ256_MASK,
29611 IX86_BUILTIN_PMOVZXWQ128_MASK,
29612 IX86_BUILTIN_PMOVZXDQ256_MASK,
29613 IX86_BUILTIN_PMOVZXDQ128_MASK,
29614 IX86_BUILTIN_REDUCEPD256_MASK,
29615 IX86_BUILTIN_REDUCEPD128_MASK,
29616 IX86_BUILTIN_REDUCEPS256_MASK,
29617 IX86_BUILTIN_REDUCEPS128_MASK,
29618 IX86_BUILTIN_REDUCESD_MASK,
29619 IX86_BUILTIN_REDUCESS_MASK,
29620 IX86_BUILTIN_VPERMVARHI256_MASK,
29621 IX86_BUILTIN_VPERMVARHI128_MASK,
29622 IX86_BUILTIN_VPERMT2VARHI256,
29623 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29624 IX86_BUILTIN_VPERMT2VARHI128,
29625 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29626 IX86_BUILTIN_VPERMI2VARHI256,
29627 IX86_BUILTIN_VPERMI2VARHI128,
29628 IX86_BUILTIN_RCP14PD256,
29629 IX86_BUILTIN_RCP14PD128,
29630 IX86_BUILTIN_RCP14PS256,
29631 IX86_BUILTIN_RCP14PS128,
29632 IX86_BUILTIN_RSQRT14PD256_MASK,
29633 IX86_BUILTIN_RSQRT14PD128_MASK,
29634 IX86_BUILTIN_RSQRT14PS256_MASK,
29635 IX86_BUILTIN_RSQRT14PS128_MASK,
29636 IX86_BUILTIN_SQRTPD256_MASK,
29637 IX86_BUILTIN_SQRTPD128_MASK,
29638 IX86_BUILTIN_SQRTPS256_MASK,
29639 IX86_BUILTIN_SQRTPS128_MASK,
29640 IX86_BUILTIN_PADDB128_MASK,
29641 IX86_BUILTIN_PADDW128_MASK,
29642 IX86_BUILTIN_PADDD128_MASK,
29643 IX86_BUILTIN_PADDQ128_MASK,
29644 IX86_BUILTIN_PSUBB128_MASK,
29645 IX86_BUILTIN_PSUBW128_MASK,
29646 IX86_BUILTIN_PSUBD128_MASK,
29647 IX86_BUILTIN_PSUBQ128_MASK,
29648 IX86_BUILTIN_PADDSB128_MASK,
29649 IX86_BUILTIN_PADDSW128_MASK,
29650 IX86_BUILTIN_PSUBSB128_MASK,
29651 IX86_BUILTIN_PSUBSW128_MASK,
29652 IX86_BUILTIN_PADDUSB128_MASK,
29653 IX86_BUILTIN_PADDUSW128_MASK,
29654 IX86_BUILTIN_PSUBUSB128_MASK,
29655 IX86_BUILTIN_PSUBUSW128_MASK,
29656 IX86_BUILTIN_PADDB256_MASK,
29657 IX86_BUILTIN_PADDW256_MASK,
29658 IX86_BUILTIN_PADDD256_MASK,
29659 IX86_BUILTIN_PADDQ256_MASK,
29660 IX86_BUILTIN_PADDSB256_MASK,
29661 IX86_BUILTIN_PADDSW256_MASK,
29662 IX86_BUILTIN_PADDUSB256_MASK,
29663 IX86_BUILTIN_PADDUSW256_MASK,
29664 IX86_BUILTIN_PSUBB256_MASK,
29665 IX86_BUILTIN_PSUBW256_MASK,
29666 IX86_BUILTIN_PSUBD256_MASK,
29667 IX86_BUILTIN_PSUBQ256_MASK,
29668 IX86_BUILTIN_PSUBSB256_MASK,
29669 IX86_BUILTIN_PSUBSW256_MASK,
29670 IX86_BUILTIN_PSUBUSB256_MASK,
29671 IX86_BUILTIN_PSUBUSW256_MASK,
29672 IX86_BUILTIN_SHUF_F64x2_256,
29673 IX86_BUILTIN_SHUF_I64x2_256,
29674 IX86_BUILTIN_SHUF_I32x4_256,
29675 IX86_BUILTIN_SHUF_F32x4_256,
29676 IX86_BUILTIN_PMOVWB128,
29677 IX86_BUILTIN_PMOVWB256,
29678 IX86_BUILTIN_PMOVSWB128,
29679 IX86_BUILTIN_PMOVSWB256,
29680 IX86_BUILTIN_PMOVUSWB128,
29681 IX86_BUILTIN_PMOVUSWB256,
29682 IX86_BUILTIN_PMOVDB128,
29683 IX86_BUILTIN_PMOVDB256,
29684 IX86_BUILTIN_PMOVSDB128,
29685 IX86_BUILTIN_PMOVSDB256,
29686 IX86_BUILTIN_PMOVUSDB128,
29687 IX86_BUILTIN_PMOVUSDB256,
29688 IX86_BUILTIN_PMOVDW128,
29689 IX86_BUILTIN_PMOVDW256,
29690 IX86_BUILTIN_PMOVSDW128,
29691 IX86_BUILTIN_PMOVSDW256,
29692 IX86_BUILTIN_PMOVUSDW128,
29693 IX86_BUILTIN_PMOVUSDW256,
29694 IX86_BUILTIN_PMOVQB128,
29695 IX86_BUILTIN_PMOVQB256,
29696 IX86_BUILTIN_PMOVSQB128,
29697 IX86_BUILTIN_PMOVSQB256,
29698 IX86_BUILTIN_PMOVUSQB128,
29699 IX86_BUILTIN_PMOVUSQB256,
29700 IX86_BUILTIN_PMOVQW128,
29701 IX86_BUILTIN_PMOVQW256,
29702 IX86_BUILTIN_PMOVSQW128,
29703 IX86_BUILTIN_PMOVSQW256,
29704 IX86_BUILTIN_PMOVUSQW128,
29705 IX86_BUILTIN_PMOVUSQW256,
29706 IX86_BUILTIN_PMOVQD128,
29707 IX86_BUILTIN_PMOVQD256,
29708 IX86_BUILTIN_PMOVSQD128,
29709 IX86_BUILTIN_PMOVSQD256,
29710 IX86_BUILTIN_PMOVUSQD128,
29711 IX86_BUILTIN_PMOVUSQD256,
29712 IX86_BUILTIN_RANGEPD256,
29713 IX86_BUILTIN_RANGEPD128,
29714 IX86_BUILTIN_RANGEPS256,
29715 IX86_BUILTIN_RANGEPS128,
29716 IX86_BUILTIN_GETEXPPS256,
29717 IX86_BUILTIN_GETEXPPD256,
29718 IX86_BUILTIN_GETEXPPS128,
29719 IX86_BUILTIN_GETEXPPD128,
29720 IX86_BUILTIN_FIXUPIMMPD256_MASK,
29721 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29722 IX86_BUILTIN_FIXUPIMMPS256_MASK,
29723 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29724 IX86_BUILTIN_FIXUPIMMPD128_MASK,
29725 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29726 IX86_BUILTIN_FIXUPIMMPS128_MASK,
29727 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29728 IX86_BUILTIN_PABSQ256,
29729 IX86_BUILTIN_PABSQ128,
29730 IX86_BUILTIN_PABSD256_MASK,
29731 IX86_BUILTIN_PABSD128_MASK,
29732 IX86_BUILTIN_PMULHRSW256_MASK,
29733 IX86_BUILTIN_PMULHRSW128_MASK,
29734 IX86_BUILTIN_PMULHUW128_MASK,
29735 IX86_BUILTIN_PMULHUW256_MASK,
29736 IX86_BUILTIN_PMULHW256_MASK,
29737 IX86_BUILTIN_PMULHW128_MASK,
29738 IX86_BUILTIN_PMULLW256_MASK,
29739 IX86_BUILTIN_PMULLW128_MASK,
29740 IX86_BUILTIN_PMULLQ256,
29741 IX86_BUILTIN_PMULLQ128,
29742 IX86_BUILTIN_ANDPD256_MASK,
29743 IX86_BUILTIN_ANDPD128_MASK,
29744 IX86_BUILTIN_ANDPS256_MASK,
29745 IX86_BUILTIN_ANDPS128_MASK,
29746 IX86_BUILTIN_ANDNPD256_MASK,
29747 IX86_BUILTIN_ANDNPD128_MASK,
29748 IX86_BUILTIN_ANDNPS256_MASK,
29749 IX86_BUILTIN_ANDNPS128_MASK,
29750 IX86_BUILTIN_PSLLWI128_MASK,
29751 IX86_BUILTIN_PSLLDI128_MASK,
29752 IX86_BUILTIN_PSLLQI128_MASK,
29753 IX86_BUILTIN_PSLLW128_MASK,
29754 IX86_BUILTIN_PSLLD128_MASK,
29755 IX86_BUILTIN_PSLLQ128_MASK,
29756 IX86_BUILTIN_PSLLWI256_MASK ,
29757 IX86_BUILTIN_PSLLW256_MASK,
29758 IX86_BUILTIN_PSLLDI256_MASK,
29759 IX86_BUILTIN_PSLLD256_MASK,
29760 IX86_BUILTIN_PSLLQI256_MASK,
29761 IX86_BUILTIN_PSLLQ256_MASK,
29762 IX86_BUILTIN_PSRADI128_MASK,
29763 IX86_BUILTIN_PSRAD128_MASK,
29764 IX86_BUILTIN_PSRADI256_MASK,
29765 IX86_BUILTIN_PSRAD256_MASK,
29766 IX86_BUILTIN_PSRAQI128_MASK,
29767 IX86_BUILTIN_PSRAQ128_MASK,
29768 IX86_BUILTIN_PSRAQI256_MASK,
29769 IX86_BUILTIN_PSRAQ256_MASK,
29770 IX86_BUILTIN_PANDD256,
29771 IX86_BUILTIN_PANDD128,
29772 IX86_BUILTIN_PSRLDI128_MASK,
29773 IX86_BUILTIN_PSRLD128_MASK,
29774 IX86_BUILTIN_PSRLDI256_MASK,
29775 IX86_BUILTIN_PSRLD256_MASK,
29776 IX86_BUILTIN_PSRLQI128_MASK,
29777 IX86_BUILTIN_PSRLQ128_MASK,
29778 IX86_BUILTIN_PSRLQI256_MASK,
29779 IX86_BUILTIN_PSRLQ256_MASK,
29780 IX86_BUILTIN_PANDQ256,
29781 IX86_BUILTIN_PANDQ128,
29782 IX86_BUILTIN_PANDND256,
29783 IX86_BUILTIN_PANDND128,
29784 IX86_BUILTIN_PANDNQ256,
29785 IX86_BUILTIN_PANDNQ128,
29786 IX86_BUILTIN_PORD256,
29787 IX86_BUILTIN_PORD128,
29788 IX86_BUILTIN_PORQ256,
29789 IX86_BUILTIN_PORQ128,
29790 IX86_BUILTIN_PXORD256,
29791 IX86_BUILTIN_PXORD128,
29792 IX86_BUILTIN_PXORQ256,
29793 IX86_BUILTIN_PXORQ128,
29794 IX86_BUILTIN_PACKSSWB256_MASK,
29795 IX86_BUILTIN_PACKSSWB128_MASK,
29796 IX86_BUILTIN_PACKUSWB256_MASK,
29797 IX86_BUILTIN_PACKUSWB128_MASK,
29798 IX86_BUILTIN_RNDSCALEPS256,
29799 IX86_BUILTIN_RNDSCALEPD256,
29800 IX86_BUILTIN_RNDSCALEPS128,
29801 IX86_BUILTIN_RNDSCALEPD128,
29802 IX86_BUILTIN_VTERNLOGQ256_MASK,
29803 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29804 IX86_BUILTIN_VTERNLOGD256_MASK,
29805 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29806 IX86_BUILTIN_VTERNLOGQ128_MASK,
29807 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29808 IX86_BUILTIN_VTERNLOGD128_MASK,
29809 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29810 IX86_BUILTIN_SCALEFPD256,
29811 IX86_BUILTIN_SCALEFPS256,
29812 IX86_BUILTIN_SCALEFPD128,
29813 IX86_BUILTIN_SCALEFPS128,
29814 IX86_BUILTIN_VFMADDPD256_MASK,
29815 IX86_BUILTIN_VFMADDPD256_MASK3,
29816 IX86_BUILTIN_VFMADDPD256_MASKZ,
29817 IX86_BUILTIN_VFMADDPD128_MASK,
29818 IX86_BUILTIN_VFMADDPD128_MASK3,
29819 IX86_BUILTIN_VFMADDPD128_MASKZ,
29820 IX86_BUILTIN_VFMADDPS256_MASK,
29821 IX86_BUILTIN_VFMADDPS256_MASK3,
29822 IX86_BUILTIN_VFMADDPS256_MASKZ,
29823 IX86_BUILTIN_VFMADDPS128_MASK,
29824 IX86_BUILTIN_VFMADDPS128_MASK3,
29825 IX86_BUILTIN_VFMADDPS128_MASKZ,
29826 IX86_BUILTIN_VFMSUBPD256_MASK3,
29827 IX86_BUILTIN_VFMSUBPD128_MASK3,
29828 IX86_BUILTIN_VFMSUBPS256_MASK3,
29829 IX86_BUILTIN_VFMSUBPS128_MASK3,
29830 IX86_BUILTIN_VFNMADDPD256_MASK,
29831 IX86_BUILTIN_VFNMADDPD128_MASK,
29832 IX86_BUILTIN_VFNMADDPS256_MASK,
29833 IX86_BUILTIN_VFNMADDPS128_MASK,
29834 IX86_BUILTIN_VFNMSUBPD256_MASK,
29835 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29836 IX86_BUILTIN_VFNMSUBPD128_MASK,
29837 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29838 IX86_BUILTIN_VFNMSUBPS256_MASK,
29839 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29840 IX86_BUILTIN_VFNMSUBPS128_MASK,
29841 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29842 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29843 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29844 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29845 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29846 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29847 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29848 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29849 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29850 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29851 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29852 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29853 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29854 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29855 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29856 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29857 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29858 IX86_BUILTIN_INSERTF64X2_256,
29859 IX86_BUILTIN_INSERTI64X2_256,
29860 IX86_BUILTIN_PSRAVV16HI,
29861 IX86_BUILTIN_PSRAVV8HI,
29862 IX86_BUILTIN_PMADDUBSW256_MASK,
29863 IX86_BUILTIN_PMADDUBSW128_MASK,
29864 IX86_BUILTIN_PMADDWD256_MASK,
29865 IX86_BUILTIN_PMADDWD128_MASK,
29866 IX86_BUILTIN_PSRLVV16HI,
29867 IX86_BUILTIN_PSRLVV8HI,
29868 IX86_BUILTIN_CVTPS2DQ256_MASK,
29869 IX86_BUILTIN_CVTPS2DQ128_MASK,
29870 IX86_BUILTIN_CVTPS2UDQ256,
29871 IX86_BUILTIN_CVTPS2UDQ128,
29872 IX86_BUILTIN_CVTPS2QQ256,
29873 IX86_BUILTIN_CVTPS2QQ128,
29874 IX86_BUILTIN_CVTPS2UQQ256,
29875 IX86_BUILTIN_CVTPS2UQQ128,
29876 IX86_BUILTIN_GETMANTPS256,
29877 IX86_BUILTIN_GETMANTPS128,
29878 IX86_BUILTIN_GETMANTPD256,
29879 IX86_BUILTIN_GETMANTPD128,
29880 IX86_BUILTIN_MOVDDUP256_MASK,
29881 IX86_BUILTIN_MOVDDUP128_MASK,
29882 IX86_BUILTIN_MOVSHDUP256_MASK,
29883 IX86_BUILTIN_MOVSHDUP128_MASK,
29884 IX86_BUILTIN_MOVSLDUP256_MASK,
29885 IX86_BUILTIN_MOVSLDUP128_MASK,
29886 IX86_BUILTIN_CVTQQ2PS256,
29887 IX86_BUILTIN_CVTQQ2PS128,
29888 IX86_BUILTIN_CVTUQQ2PS256,
29889 IX86_BUILTIN_CVTUQQ2PS128,
29890 IX86_BUILTIN_CVTQQ2PD256,
29891 IX86_BUILTIN_CVTQQ2PD128,
29892 IX86_BUILTIN_CVTUQQ2PD256,
29893 IX86_BUILTIN_CVTUQQ2PD128,
29894 IX86_BUILTIN_VPERMT2VARQ256,
29895 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29896 IX86_BUILTIN_VPERMT2VARD256,
29897 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29898 IX86_BUILTIN_VPERMI2VARQ256,
29899 IX86_BUILTIN_VPERMI2VARD256,
29900 IX86_BUILTIN_VPERMT2VARPD256,
29901 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29902 IX86_BUILTIN_VPERMT2VARPS256,
29903 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29904 IX86_BUILTIN_VPERMI2VARPD256,
29905 IX86_BUILTIN_VPERMI2VARPS256,
29906 IX86_BUILTIN_VPERMT2VARQ128,
29907 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29908 IX86_BUILTIN_VPERMT2VARD128,
29909 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29910 IX86_BUILTIN_VPERMI2VARQ128,
29911 IX86_BUILTIN_VPERMI2VARD128,
29912 IX86_BUILTIN_VPERMT2VARPD128,
29913 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29914 IX86_BUILTIN_VPERMT2VARPS128,
29915 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29916 IX86_BUILTIN_VPERMI2VARPD128,
29917 IX86_BUILTIN_VPERMI2VARPS128,
29918 IX86_BUILTIN_PSHUFB256_MASK,
29919 IX86_BUILTIN_PSHUFB128_MASK,
29920 IX86_BUILTIN_PSHUFHW256_MASK,
29921 IX86_BUILTIN_PSHUFHW128_MASK,
29922 IX86_BUILTIN_PSHUFLW256_MASK,
29923 IX86_BUILTIN_PSHUFLW128_MASK,
29924 IX86_BUILTIN_PSHUFD256_MASK,
29925 IX86_BUILTIN_PSHUFD128_MASK,
29926 IX86_BUILTIN_SHUFPD256_MASK,
29927 IX86_BUILTIN_SHUFPD128_MASK,
29928 IX86_BUILTIN_SHUFPS256_MASK,
29929 IX86_BUILTIN_SHUFPS128_MASK,
29930 IX86_BUILTIN_PROLVQ256,
29931 IX86_BUILTIN_PROLVQ128,
29932 IX86_BUILTIN_PROLQ256,
29933 IX86_BUILTIN_PROLQ128,
29934 IX86_BUILTIN_PRORVQ256,
29935 IX86_BUILTIN_PRORVQ128,
29936 IX86_BUILTIN_PRORQ256,
29937 IX86_BUILTIN_PRORQ128,
29938 IX86_BUILTIN_PSRAVQ128,
29939 IX86_BUILTIN_PSRAVQ256,
29940 IX86_BUILTIN_PSLLVV4DI_MASK,
29941 IX86_BUILTIN_PSLLVV2DI_MASK,
29942 IX86_BUILTIN_PSLLVV8SI_MASK,
29943 IX86_BUILTIN_PSLLVV4SI_MASK,
29944 IX86_BUILTIN_PSRAVV8SI_MASK,
29945 IX86_BUILTIN_PSRAVV4SI_MASK,
29946 IX86_BUILTIN_PSRLVV4DI_MASK,
29947 IX86_BUILTIN_PSRLVV2DI_MASK,
29948 IX86_BUILTIN_PSRLVV8SI_MASK,
29949 IX86_BUILTIN_PSRLVV4SI_MASK,
29950 IX86_BUILTIN_PSRAWI256_MASK,
29951 IX86_BUILTIN_PSRAW256_MASK,
29952 IX86_BUILTIN_PSRAWI128_MASK,
29953 IX86_BUILTIN_PSRAW128_MASK,
29954 IX86_BUILTIN_PSRLWI256_MASK,
29955 IX86_BUILTIN_PSRLW256_MASK,
29956 IX86_BUILTIN_PSRLWI128_MASK,
29957 IX86_BUILTIN_PSRLW128_MASK,
29958 IX86_BUILTIN_PRORVD256,
29959 IX86_BUILTIN_PROLVD256,
29960 IX86_BUILTIN_PRORD256,
29961 IX86_BUILTIN_PROLD256,
29962 IX86_BUILTIN_PRORVD128,
29963 IX86_BUILTIN_PROLVD128,
29964 IX86_BUILTIN_PRORD128,
29965 IX86_BUILTIN_PROLD128,
29966 IX86_BUILTIN_FPCLASSPD256,
29967 IX86_BUILTIN_FPCLASSPD128,
29968 IX86_BUILTIN_FPCLASSSD,
29969 IX86_BUILTIN_FPCLASSPS256,
29970 IX86_BUILTIN_FPCLASSPS128,
29971 IX86_BUILTIN_FPCLASSSS,
29972 IX86_BUILTIN_CVTB2MASK128,
29973 IX86_BUILTIN_CVTB2MASK256,
29974 IX86_BUILTIN_CVTW2MASK128,
29975 IX86_BUILTIN_CVTW2MASK256,
29976 IX86_BUILTIN_CVTD2MASK128,
29977 IX86_BUILTIN_CVTD2MASK256,
29978 IX86_BUILTIN_CVTQ2MASK128,
29979 IX86_BUILTIN_CVTQ2MASK256,
29980 IX86_BUILTIN_CVTMASK2B128,
29981 IX86_BUILTIN_CVTMASK2B256,
29982 IX86_BUILTIN_CVTMASK2W128,
29983 IX86_BUILTIN_CVTMASK2W256,
29984 IX86_BUILTIN_CVTMASK2D128,
29985 IX86_BUILTIN_CVTMASK2D256,
29986 IX86_BUILTIN_CVTMASK2Q128,
29987 IX86_BUILTIN_CVTMASK2Q256,
29988 IX86_BUILTIN_PCMPEQB128_MASK,
29989 IX86_BUILTIN_PCMPEQB256_MASK,
29990 IX86_BUILTIN_PCMPEQW128_MASK,
29991 IX86_BUILTIN_PCMPEQW256_MASK,
29992 IX86_BUILTIN_PCMPEQD128_MASK,
29993 IX86_BUILTIN_PCMPEQD256_MASK,
29994 IX86_BUILTIN_PCMPEQQ128_MASK,
29995 IX86_BUILTIN_PCMPEQQ256_MASK,
29996 IX86_BUILTIN_PCMPGTB128_MASK,
29997 IX86_BUILTIN_PCMPGTB256_MASK,
29998 IX86_BUILTIN_PCMPGTW128_MASK,
29999 IX86_BUILTIN_PCMPGTW256_MASK,
30000 IX86_BUILTIN_PCMPGTD128_MASK,
30001 IX86_BUILTIN_PCMPGTD256_MASK,
30002 IX86_BUILTIN_PCMPGTQ128_MASK,
30003 IX86_BUILTIN_PCMPGTQ256_MASK,
30004 IX86_BUILTIN_PTESTMB128,
30005 IX86_BUILTIN_PTESTMB256,
30006 IX86_BUILTIN_PTESTMW128,
30007 IX86_BUILTIN_PTESTMW256,
30008 IX86_BUILTIN_PTESTMD128,
30009 IX86_BUILTIN_PTESTMD256,
30010 IX86_BUILTIN_PTESTMQ128,
30011 IX86_BUILTIN_PTESTMQ256,
30012 IX86_BUILTIN_PTESTNMB128,
30013 IX86_BUILTIN_PTESTNMB256,
30014 IX86_BUILTIN_PTESTNMW128,
30015 IX86_BUILTIN_PTESTNMW256,
30016 IX86_BUILTIN_PTESTNMD128,
30017 IX86_BUILTIN_PTESTNMD256,
30018 IX86_BUILTIN_PTESTNMQ128,
30019 IX86_BUILTIN_PTESTNMQ256,
30020 IX86_BUILTIN_PBROADCASTMB128,
30021 IX86_BUILTIN_PBROADCASTMB256,
30022 IX86_BUILTIN_PBROADCASTMW128,
30023 IX86_BUILTIN_PBROADCASTMW256,
30024 IX86_BUILTIN_COMPRESSPD256,
30025 IX86_BUILTIN_COMPRESSPD128,
30026 IX86_BUILTIN_COMPRESSPS256,
30027 IX86_BUILTIN_COMPRESSPS128,
30028 IX86_BUILTIN_PCOMPRESSQ256,
30029 IX86_BUILTIN_PCOMPRESSQ128,
30030 IX86_BUILTIN_PCOMPRESSD256,
30031 IX86_BUILTIN_PCOMPRESSD128,
30032 IX86_BUILTIN_EXPANDPD256,
30033 IX86_BUILTIN_EXPANDPD128,
30034 IX86_BUILTIN_EXPANDPS256,
30035 IX86_BUILTIN_EXPANDPS128,
30036 IX86_BUILTIN_PEXPANDQ256,
30037 IX86_BUILTIN_PEXPANDQ128,
30038 IX86_BUILTIN_PEXPANDD256,
30039 IX86_BUILTIN_PEXPANDD128,
30040 IX86_BUILTIN_EXPANDPD256Z,
30041 IX86_BUILTIN_EXPANDPD128Z,
30042 IX86_BUILTIN_EXPANDPS256Z,
30043 IX86_BUILTIN_EXPANDPS128Z,
30044 IX86_BUILTIN_PEXPANDQ256Z,
30045 IX86_BUILTIN_PEXPANDQ128Z,
30046 IX86_BUILTIN_PEXPANDD256Z,
30047 IX86_BUILTIN_PEXPANDD128Z,
30048 IX86_BUILTIN_PMAXSD256_MASK,
30049 IX86_BUILTIN_PMINSD256_MASK,
30050 IX86_BUILTIN_PMAXUD256_MASK,
30051 IX86_BUILTIN_PMINUD256_MASK,
30052 IX86_BUILTIN_PMAXSD128_MASK,
30053 IX86_BUILTIN_PMINSD128_MASK,
30054 IX86_BUILTIN_PMAXUD128_MASK,
30055 IX86_BUILTIN_PMINUD128_MASK,
30056 IX86_BUILTIN_PMAXSQ256_MASK,
30057 IX86_BUILTIN_PMINSQ256_MASK,
30058 IX86_BUILTIN_PMAXUQ256_MASK,
30059 IX86_BUILTIN_PMINUQ256_MASK,
30060 IX86_BUILTIN_PMAXSQ128_MASK,
30061 IX86_BUILTIN_PMINSQ128_MASK,
30062 IX86_BUILTIN_PMAXUQ128_MASK,
30063 IX86_BUILTIN_PMINUQ128_MASK,
30064 IX86_BUILTIN_PMINSB256_MASK,
30065 IX86_BUILTIN_PMINUB256_MASK,
30066 IX86_BUILTIN_PMAXSB256_MASK,
30067 IX86_BUILTIN_PMAXUB256_MASK,
30068 IX86_BUILTIN_PMINSB128_MASK,
30069 IX86_BUILTIN_PMINUB128_MASK,
30070 IX86_BUILTIN_PMAXSB128_MASK,
30071 IX86_BUILTIN_PMAXUB128_MASK,
30072 IX86_BUILTIN_PMINSW256_MASK,
30073 IX86_BUILTIN_PMINUW256_MASK,
30074 IX86_BUILTIN_PMAXSW256_MASK,
30075 IX86_BUILTIN_PMAXUW256_MASK,
30076 IX86_BUILTIN_PMINSW128_MASK,
30077 IX86_BUILTIN_PMINUW128_MASK,
30078 IX86_BUILTIN_PMAXSW128_MASK,
30079 IX86_BUILTIN_PMAXUW128_MASK,
30080 IX86_BUILTIN_VPCONFLICTQ256,
30081 IX86_BUILTIN_VPCONFLICTD256,
30082 IX86_BUILTIN_VPCLZCNTQ256,
30083 IX86_BUILTIN_VPCLZCNTD256,
30084 IX86_BUILTIN_UNPCKHPD256_MASK,
30085 IX86_BUILTIN_UNPCKHPD128_MASK,
30086 IX86_BUILTIN_UNPCKHPS256_MASK,
30087 IX86_BUILTIN_UNPCKHPS128_MASK,
30088 IX86_BUILTIN_UNPCKLPD256_MASK,
30089 IX86_BUILTIN_UNPCKLPD128_MASK,
30090 IX86_BUILTIN_UNPCKLPS256_MASK,
30091 IX86_BUILTIN_VPCONFLICTQ128,
30092 IX86_BUILTIN_VPCONFLICTD128,
30093 IX86_BUILTIN_VPCLZCNTQ128,
30094 IX86_BUILTIN_VPCLZCNTD128,
30095 IX86_BUILTIN_UNPCKLPS128_MASK,
30096 IX86_BUILTIN_ALIGND256,
30097 IX86_BUILTIN_ALIGNQ256,
30098 IX86_BUILTIN_ALIGND128,
30099 IX86_BUILTIN_ALIGNQ128,
30100 IX86_BUILTIN_CVTPS2PH256_MASK,
30101 IX86_BUILTIN_CVTPS2PH_MASK,
30102 IX86_BUILTIN_CVTPH2PS_MASK,
30103 IX86_BUILTIN_CVTPH2PS256_MASK,
30104 IX86_BUILTIN_PUNPCKHDQ128_MASK,
30105 IX86_BUILTIN_PUNPCKHDQ256_MASK,
30106 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30107 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30108 IX86_BUILTIN_PUNPCKLDQ128_MASK,
30109 IX86_BUILTIN_PUNPCKLDQ256_MASK,
30110 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30111 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30112 IX86_BUILTIN_PUNPCKHBW128_MASK,
30113 IX86_BUILTIN_PUNPCKHBW256_MASK,
30114 IX86_BUILTIN_PUNPCKHWD128_MASK,
30115 IX86_BUILTIN_PUNPCKHWD256_MASK,
30116 IX86_BUILTIN_PUNPCKLBW128_MASK,
30117 IX86_BUILTIN_PUNPCKLBW256_MASK,
30118 IX86_BUILTIN_PUNPCKLWD128_MASK,
30119 IX86_BUILTIN_PUNPCKLWD256_MASK,
30120 IX86_BUILTIN_PSLLVV16HI,
30121 IX86_BUILTIN_PSLLVV8HI,
30122 IX86_BUILTIN_PACKSSDW256_MASK,
30123 IX86_BUILTIN_PACKSSDW128_MASK,
30124 IX86_BUILTIN_PACKUSDW256_MASK,
30125 IX86_BUILTIN_PACKUSDW128_MASK,
30126 IX86_BUILTIN_PAVGB256_MASK,
30127 IX86_BUILTIN_PAVGW256_MASK,
30128 IX86_BUILTIN_PAVGB128_MASK,
30129 IX86_BUILTIN_PAVGW128_MASK,
30130 IX86_BUILTIN_VPERMVARSF256_MASK,
30131 IX86_BUILTIN_VPERMVARDF256_MASK,
30132 IX86_BUILTIN_VPERMDF256_MASK,
30133 IX86_BUILTIN_PABSB256_MASK,
30134 IX86_BUILTIN_PABSB128_MASK,
30135 IX86_BUILTIN_PABSW256_MASK,
30136 IX86_BUILTIN_PABSW128_MASK,
30137 IX86_BUILTIN_VPERMILVARPD_MASK,
30138 IX86_BUILTIN_VPERMILVARPS_MASK,
30139 IX86_BUILTIN_VPERMILVARPD256_MASK,
30140 IX86_BUILTIN_VPERMILVARPS256_MASK,
30141 IX86_BUILTIN_VPERMILPD_MASK,
30142 IX86_BUILTIN_VPERMILPS_MASK,
30143 IX86_BUILTIN_VPERMILPD256_MASK,
30144 IX86_BUILTIN_VPERMILPS256_MASK,
30145 IX86_BUILTIN_BLENDMQ256,
30146 IX86_BUILTIN_BLENDMD256,
30147 IX86_BUILTIN_BLENDMPD256,
30148 IX86_BUILTIN_BLENDMPS256,
30149 IX86_BUILTIN_BLENDMQ128,
30150 IX86_BUILTIN_BLENDMD128,
30151 IX86_BUILTIN_BLENDMPD128,
30152 IX86_BUILTIN_BLENDMPS128,
30153 IX86_BUILTIN_BLENDMW256,
30154 IX86_BUILTIN_BLENDMB256,
30155 IX86_BUILTIN_BLENDMW128,
30156 IX86_BUILTIN_BLENDMB128,
30157 IX86_BUILTIN_PMULLD256_MASK,
30158 IX86_BUILTIN_PMULLD128_MASK,
30159 IX86_BUILTIN_PMULUDQ256_MASK,
30160 IX86_BUILTIN_PMULDQ256_MASK,
30161 IX86_BUILTIN_PMULDQ128_MASK,
30162 IX86_BUILTIN_PMULUDQ128_MASK,
30163 IX86_BUILTIN_CVTPD2PS256_MASK,
30164 IX86_BUILTIN_CVTPD2PS_MASK,
30165 IX86_BUILTIN_VPERMVARSI256_MASK,
30166 IX86_BUILTIN_VPERMVARDI256_MASK,
30167 IX86_BUILTIN_VPERMDI256_MASK,
30168 IX86_BUILTIN_CMPQ256,
30169 IX86_BUILTIN_CMPD256,
30170 IX86_BUILTIN_UCMPQ256,
30171 IX86_BUILTIN_UCMPD256,
30172 IX86_BUILTIN_CMPB256,
30173 IX86_BUILTIN_CMPW256,
30174 IX86_BUILTIN_UCMPB256,
30175 IX86_BUILTIN_UCMPW256,
30176 IX86_BUILTIN_CMPPD256_MASK,
30177 IX86_BUILTIN_CMPPS256_MASK,
30178 IX86_BUILTIN_CMPQ128,
30179 IX86_BUILTIN_CMPD128,
30180 IX86_BUILTIN_UCMPQ128,
30181 IX86_BUILTIN_UCMPD128,
30182 IX86_BUILTIN_CMPB128,
30183 IX86_BUILTIN_CMPW128,
30184 IX86_BUILTIN_UCMPB128,
30185 IX86_BUILTIN_UCMPW128,
30186 IX86_BUILTIN_CMPPD128_MASK,
30187 IX86_BUILTIN_CMPPS128_MASK,
30189 IX86_BUILTIN_GATHER3SIV8SF,
30190 IX86_BUILTIN_GATHER3SIV4SF,
30191 IX86_BUILTIN_GATHER3SIV4DF,
30192 IX86_BUILTIN_GATHER3SIV2DF,
30193 IX86_BUILTIN_GATHER3DIV8SF,
30194 IX86_BUILTIN_GATHER3DIV4SF,
30195 IX86_BUILTIN_GATHER3DIV4DF,
30196 IX86_BUILTIN_GATHER3DIV2DF,
30197 IX86_BUILTIN_GATHER3SIV8SI,
30198 IX86_BUILTIN_GATHER3SIV4SI,
30199 IX86_BUILTIN_GATHER3SIV4DI,
30200 IX86_BUILTIN_GATHER3SIV2DI,
30201 IX86_BUILTIN_GATHER3DIV8SI,
30202 IX86_BUILTIN_GATHER3DIV4SI,
30203 IX86_BUILTIN_GATHER3DIV4DI,
30204 IX86_BUILTIN_GATHER3DIV2DI,
30205 IX86_BUILTIN_SCATTERSIV8SF,
30206 IX86_BUILTIN_SCATTERSIV4SF,
30207 IX86_BUILTIN_SCATTERSIV4DF,
30208 IX86_BUILTIN_SCATTERSIV2DF,
30209 IX86_BUILTIN_SCATTERDIV8SF,
30210 IX86_BUILTIN_SCATTERDIV4SF,
30211 IX86_BUILTIN_SCATTERDIV4DF,
30212 IX86_BUILTIN_SCATTERDIV2DF,
30213 IX86_BUILTIN_SCATTERSIV8SI,
30214 IX86_BUILTIN_SCATTERSIV4SI,
30215 IX86_BUILTIN_SCATTERSIV4DI,
30216 IX86_BUILTIN_SCATTERSIV2DI,
30217 IX86_BUILTIN_SCATTERDIV8SI,
30218 IX86_BUILTIN_SCATTERDIV4SI,
30219 IX86_BUILTIN_SCATTERDIV4DI,
30220 IX86_BUILTIN_SCATTERDIV2DI,
30222 /* AVX512DQ. */
30223 IX86_BUILTIN_RANGESD128,
30224 IX86_BUILTIN_RANGESS128,
30225 IX86_BUILTIN_KUNPCKWD,
30226 IX86_BUILTIN_KUNPCKDQ,
30227 IX86_BUILTIN_BROADCASTF32x2_512,
30228 IX86_BUILTIN_BROADCASTI32x2_512,
30229 IX86_BUILTIN_BROADCASTF64X2_512,
30230 IX86_BUILTIN_BROADCASTI64X2_512,
30231 IX86_BUILTIN_BROADCASTF32X8_512,
30232 IX86_BUILTIN_BROADCASTI32X8_512,
30233 IX86_BUILTIN_EXTRACTF64X2_512,
30234 IX86_BUILTIN_EXTRACTF32X8,
30235 IX86_BUILTIN_EXTRACTI64X2_512,
30236 IX86_BUILTIN_EXTRACTI32X8,
30237 IX86_BUILTIN_REDUCEPD512_MASK,
30238 IX86_BUILTIN_REDUCEPS512_MASK,
30239 IX86_BUILTIN_PMULLQ512,
30240 IX86_BUILTIN_XORPD512,
30241 IX86_BUILTIN_XORPS512,
30242 IX86_BUILTIN_ORPD512,
30243 IX86_BUILTIN_ORPS512,
30244 IX86_BUILTIN_ANDPD512,
30245 IX86_BUILTIN_ANDPS512,
30246 IX86_BUILTIN_ANDNPD512,
30247 IX86_BUILTIN_ANDNPS512,
30248 IX86_BUILTIN_INSERTF32X8,
30249 IX86_BUILTIN_INSERTI32X8,
30250 IX86_BUILTIN_INSERTF64X2_512,
30251 IX86_BUILTIN_INSERTI64X2_512,
30252 IX86_BUILTIN_FPCLASSPD512,
30253 IX86_BUILTIN_FPCLASSPS512,
30254 IX86_BUILTIN_CVTD2MASK512,
30255 IX86_BUILTIN_CVTQ2MASK512,
30256 IX86_BUILTIN_CVTMASK2D512,
30257 IX86_BUILTIN_CVTMASK2Q512,
30258 IX86_BUILTIN_CVTPD2QQ512,
30259 IX86_BUILTIN_CVTPS2QQ512,
30260 IX86_BUILTIN_CVTPD2UQQ512,
30261 IX86_BUILTIN_CVTPS2UQQ512,
30262 IX86_BUILTIN_CVTQQ2PS512,
30263 IX86_BUILTIN_CVTUQQ2PS512,
30264 IX86_BUILTIN_CVTQQ2PD512,
30265 IX86_BUILTIN_CVTUQQ2PD512,
30266 IX86_BUILTIN_CVTTPS2QQ512,
30267 IX86_BUILTIN_CVTTPS2UQQ512,
30268 IX86_BUILTIN_CVTTPD2QQ512,
30269 IX86_BUILTIN_CVTTPD2UQQ512,
30270 IX86_BUILTIN_RANGEPS512,
30271 IX86_BUILTIN_RANGEPD512,
30273 /* AVX512BW. */
30274 IX86_BUILTIN_PACKUSDW512,
30275 IX86_BUILTIN_PACKSSDW512,
30276 IX86_BUILTIN_LOADDQUHI512_MASK,
30277 IX86_BUILTIN_LOADDQUQI512_MASK,
30278 IX86_BUILTIN_PSLLDQ512,
30279 IX86_BUILTIN_PSRLDQ512,
30280 IX86_BUILTIN_STOREDQUHI512_MASK,
30281 IX86_BUILTIN_STOREDQUQI512_MASK,
30282 IX86_BUILTIN_PALIGNR512,
30283 IX86_BUILTIN_PALIGNR512_MASK,
30284 IX86_BUILTIN_MOVDQUHI512_MASK,
30285 IX86_BUILTIN_MOVDQUQI512_MASK,
30286 IX86_BUILTIN_PSADBW512,
30287 IX86_BUILTIN_DBPSADBW512,
30288 IX86_BUILTIN_PBROADCASTB512,
30289 IX86_BUILTIN_PBROADCASTB512_GPR,
30290 IX86_BUILTIN_PBROADCASTW512,
30291 IX86_BUILTIN_PBROADCASTW512_GPR,
30292 IX86_BUILTIN_PMOVSXBW512_MASK,
30293 IX86_BUILTIN_PMOVZXBW512_MASK,
30294 IX86_BUILTIN_VPERMVARHI512_MASK,
30295 IX86_BUILTIN_VPERMT2VARHI512,
30296 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30297 IX86_BUILTIN_VPERMI2VARHI512,
30298 IX86_BUILTIN_PAVGB512,
30299 IX86_BUILTIN_PAVGW512,
30300 IX86_BUILTIN_PADDB512,
30301 IX86_BUILTIN_PSUBB512,
30302 IX86_BUILTIN_PSUBSB512,
30303 IX86_BUILTIN_PADDSB512,
30304 IX86_BUILTIN_PSUBUSB512,
30305 IX86_BUILTIN_PADDUSB512,
30306 IX86_BUILTIN_PSUBW512,
30307 IX86_BUILTIN_PADDW512,
30308 IX86_BUILTIN_PSUBSW512,
30309 IX86_BUILTIN_PADDSW512,
30310 IX86_BUILTIN_PSUBUSW512,
30311 IX86_BUILTIN_PADDUSW512,
30312 IX86_BUILTIN_PMAXUW512,
30313 IX86_BUILTIN_PMAXSW512,
30314 IX86_BUILTIN_PMINUW512,
30315 IX86_BUILTIN_PMINSW512,
30316 IX86_BUILTIN_PMAXUB512,
30317 IX86_BUILTIN_PMAXSB512,
30318 IX86_BUILTIN_PMINUB512,
30319 IX86_BUILTIN_PMINSB512,
30320 IX86_BUILTIN_PMOVWB512,
30321 IX86_BUILTIN_PMOVSWB512,
30322 IX86_BUILTIN_PMOVUSWB512,
30323 IX86_BUILTIN_PMULHRSW512_MASK,
30324 IX86_BUILTIN_PMULHUW512_MASK,
30325 IX86_BUILTIN_PMULHW512_MASK,
30326 IX86_BUILTIN_PMULLW512_MASK,
30327 IX86_BUILTIN_PSLLWI512_MASK,
30328 IX86_BUILTIN_PSLLW512_MASK,
30329 IX86_BUILTIN_PACKSSWB512,
30330 IX86_BUILTIN_PACKUSWB512,
30331 IX86_BUILTIN_PSRAVV32HI,
30332 IX86_BUILTIN_PMADDUBSW512_MASK,
30333 IX86_BUILTIN_PMADDWD512_MASK,
30334 IX86_BUILTIN_PSRLVV32HI,
30335 IX86_BUILTIN_PUNPCKHBW512,
30336 IX86_BUILTIN_PUNPCKHWD512,
30337 IX86_BUILTIN_PUNPCKLBW512,
30338 IX86_BUILTIN_PUNPCKLWD512,
30339 IX86_BUILTIN_PSHUFB512,
30340 IX86_BUILTIN_PSHUFHW512,
30341 IX86_BUILTIN_PSHUFLW512,
30342 IX86_BUILTIN_PSRAWI512,
30343 IX86_BUILTIN_PSRAW512,
30344 IX86_BUILTIN_PSRLWI512,
30345 IX86_BUILTIN_PSRLW512,
30346 IX86_BUILTIN_CVTB2MASK512,
30347 IX86_BUILTIN_CVTW2MASK512,
30348 IX86_BUILTIN_CVTMASK2B512,
30349 IX86_BUILTIN_CVTMASK2W512,
30350 IX86_BUILTIN_PCMPEQB512_MASK,
30351 IX86_BUILTIN_PCMPEQW512_MASK,
30352 IX86_BUILTIN_PCMPGTB512_MASK,
30353 IX86_BUILTIN_PCMPGTW512_MASK,
30354 IX86_BUILTIN_PTESTMB512,
30355 IX86_BUILTIN_PTESTMW512,
30356 IX86_BUILTIN_PTESTNMB512,
30357 IX86_BUILTIN_PTESTNMW512,
30358 IX86_BUILTIN_PSLLVV32HI,
30359 IX86_BUILTIN_PABSB512,
30360 IX86_BUILTIN_PABSW512,
30361 IX86_BUILTIN_BLENDMW512,
30362 IX86_BUILTIN_BLENDMB512,
30363 IX86_BUILTIN_CMPB512,
30364 IX86_BUILTIN_CMPW512,
30365 IX86_BUILTIN_UCMPB512,
30366 IX86_BUILTIN_UCMPW512,
30368 /* Alternate 4 and 8 element gather/scatter for the vectorizer
30369 where all operands are 32-byte or 64-byte wide respectively. */
30370 IX86_BUILTIN_GATHERALTSIV4DF,
30371 IX86_BUILTIN_GATHERALTDIV8SF,
30372 IX86_BUILTIN_GATHERALTSIV4DI,
30373 IX86_BUILTIN_GATHERALTDIV8SI,
30374 IX86_BUILTIN_GATHER3ALTDIV16SF,
30375 IX86_BUILTIN_GATHER3ALTDIV16SI,
30376 IX86_BUILTIN_GATHER3ALTSIV4DF,
30377 IX86_BUILTIN_GATHER3ALTDIV8SF,
30378 IX86_BUILTIN_GATHER3ALTSIV4DI,
30379 IX86_BUILTIN_GATHER3ALTDIV8SI,
30380 IX86_BUILTIN_GATHER3ALTSIV8DF,
30381 IX86_BUILTIN_GATHER3ALTSIV8DI,
30382 IX86_BUILTIN_GATHER3DIV16SF,
30383 IX86_BUILTIN_GATHER3DIV16SI,
30384 IX86_BUILTIN_GATHER3DIV8DF,
30385 IX86_BUILTIN_GATHER3DIV8DI,
30386 IX86_BUILTIN_GATHER3SIV16SF,
30387 IX86_BUILTIN_GATHER3SIV16SI,
30388 IX86_BUILTIN_GATHER3SIV8DF,
30389 IX86_BUILTIN_GATHER3SIV8DI,
30390 IX86_BUILTIN_SCATTERDIV16SF,
30391 IX86_BUILTIN_SCATTERDIV16SI,
30392 IX86_BUILTIN_SCATTERDIV8DF,
30393 IX86_BUILTIN_SCATTERDIV8DI,
30394 IX86_BUILTIN_SCATTERSIV16SF,
30395 IX86_BUILTIN_SCATTERSIV16SI,
30396 IX86_BUILTIN_SCATTERSIV8DF,
30397 IX86_BUILTIN_SCATTERSIV8DI,
30399 /* AVX512PF */
30400 IX86_BUILTIN_GATHERPFQPD,
30401 IX86_BUILTIN_GATHERPFDPS,
30402 IX86_BUILTIN_GATHERPFDPD,
30403 IX86_BUILTIN_GATHERPFQPS,
30404 IX86_BUILTIN_SCATTERPFDPD,
30405 IX86_BUILTIN_SCATTERPFDPS,
30406 IX86_BUILTIN_SCATTERPFQPD,
30407 IX86_BUILTIN_SCATTERPFQPS,
30409 /* AVX-512ER */
30410 IX86_BUILTIN_EXP2PD_MASK,
30411 IX86_BUILTIN_EXP2PS_MASK,
30412 IX86_BUILTIN_EXP2PS,
30413 IX86_BUILTIN_RCP28PD,
30414 IX86_BUILTIN_RCP28PS,
30415 IX86_BUILTIN_RCP28SD,
30416 IX86_BUILTIN_RCP28SS,
30417 IX86_BUILTIN_RSQRT28PD,
30418 IX86_BUILTIN_RSQRT28PS,
30419 IX86_BUILTIN_RSQRT28SD,
30420 IX86_BUILTIN_RSQRT28SS,
30422 /* AVX-512IFMA */
30423 IX86_BUILTIN_VPMADD52LUQ512,
30424 IX86_BUILTIN_VPMADD52HUQ512,
30425 IX86_BUILTIN_VPMADD52LUQ256,
30426 IX86_BUILTIN_VPMADD52HUQ256,
30427 IX86_BUILTIN_VPMADD52LUQ128,
30428 IX86_BUILTIN_VPMADD52HUQ128,
30429 IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30430 IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30431 IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30432 IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30433 IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30434 IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30436 /* AVX-512VBMI */
30437 IX86_BUILTIN_VPMULTISHIFTQB512,
30438 IX86_BUILTIN_VPMULTISHIFTQB256,
30439 IX86_BUILTIN_VPMULTISHIFTQB128,
30440 IX86_BUILTIN_VPERMVARQI512_MASK,
30441 IX86_BUILTIN_VPERMT2VARQI512,
30442 IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30443 IX86_BUILTIN_VPERMI2VARQI512,
30444 IX86_BUILTIN_VPERMVARQI256_MASK,
30445 IX86_BUILTIN_VPERMVARQI128_MASK,
30446 IX86_BUILTIN_VPERMT2VARQI256,
30447 IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30448 IX86_BUILTIN_VPERMT2VARQI128,
30449 IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30450 IX86_BUILTIN_VPERMI2VARQI256,
30451 IX86_BUILTIN_VPERMI2VARQI128,
30453 /* SHA builtins. */
30454 IX86_BUILTIN_SHA1MSG1,
30455 IX86_BUILTIN_SHA1MSG2,
30456 IX86_BUILTIN_SHA1NEXTE,
30457 IX86_BUILTIN_SHA1RNDS4,
30458 IX86_BUILTIN_SHA256MSG1,
30459 IX86_BUILTIN_SHA256MSG2,
30460 IX86_BUILTIN_SHA256RNDS2,
30462 /* CLWB instructions. */
30463 IX86_BUILTIN_CLWB,
30465 /* PCOMMIT instructions. */
30466 IX86_BUILTIN_PCOMMIT,
30468 /* CLFLUSHOPT instructions. */
30469 IX86_BUILTIN_CLFLUSHOPT,
30471 /* TFmode support builtins. */
30472 IX86_BUILTIN_INFQ,
30473 IX86_BUILTIN_HUGE_VALQ,
30474 IX86_BUILTIN_FABSQ,
30475 IX86_BUILTIN_COPYSIGNQ,
30477 /* Vectorizer support builtins. */
30478 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30479 IX86_BUILTIN_CPYSGNPS,
30480 IX86_BUILTIN_CPYSGNPD,
30481 IX86_BUILTIN_CPYSGNPS256,
30482 IX86_BUILTIN_CPYSGNPS512,
30483 IX86_BUILTIN_CPYSGNPD256,
30484 IX86_BUILTIN_CPYSGNPD512,
30485 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30486 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30489 /* FMA4 instructions. */
30490 IX86_BUILTIN_VFMADDSS,
30491 IX86_BUILTIN_VFMADDSD,
30492 IX86_BUILTIN_VFMADDPS,
30493 IX86_BUILTIN_VFMADDPD,
30494 IX86_BUILTIN_VFMADDPS256,
30495 IX86_BUILTIN_VFMADDPD256,
30496 IX86_BUILTIN_VFMADDSUBPS,
30497 IX86_BUILTIN_VFMADDSUBPD,
30498 IX86_BUILTIN_VFMADDSUBPS256,
30499 IX86_BUILTIN_VFMADDSUBPD256,
30501 /* FMA3 instructions. */
30502 IX86_BUILTIN_VFMADDSS3,
30503 IX86_BUILTIN_VFMADDSD3,
30505 /* XOP instructions. */
30506 IX86_BUILTIN_VPCMOV,
30507 IX86_BUILTIN_VPCMOV_V2DI,
30508 IX86_BUILTIN_VPCMOV_V4SI,
30509 IX86_BUILTIN_VPCMOV_V8HI,
30510 IX86_BUILTIN_VPCMOV_V16QI,
30511 IX86_BUILTIN_VPCMOV_V4SF,
30512 IX86_BUILTIN_VPCMOV_V2DF,
30513 IX86_BUILTIN_VPCMOV256,
30514 IX86_BUILTIN_VPCMOV_V4DI256,
30515 IX86_BUILTIN_VPCMOV_V8SI256,
30516 IX86_BUILTIN_VPCMOV_V16HI256,
30517 IX86_BUILTIN_VPCMOV_V32QI256,
30518 IX86_BUILTIN_VPCMOV_V8SF256,
30519 IX86_BUILTIN_VPCMOV_V4DF256,
30521 IX86_BUILTIN_VPPERM,
30523 IX86_BUILTIN_VPMACSSWW,
30524 IX86_BUILTIN_VPMACSWW,
30525 IX86_BUILTIN_VPMACSSWD,
30526 IX86_BUILTIN_VPMACSWD,
30527 IX86_BUILTIN_VPMACSSDD,
30528 IX86_BUILTIN_VPMACSDD,
30529 IX86_BUILTIN_VPMACSSDQL,
30530 IX86_BUILTIN_VPMACSSDQH,
30531 IX86_BUILTIN_VPMACSDQL,
30532 IX86_BUILTIN_VPMACSDQH,
30533 IX86_BUILTIN_VPMADCSSWD,
30534 IX86_BUILTIN_VPMADCSWD,
30536 IX86_BUILTIN_VPHADDBW,
30537 IX86_BUILTIN_VPHADDBD,
30538 IX86_BUILTIN_VPHADDBQ,
30539 IX86_BUILTIN_VPHADDWD,
30540 IX86_BUILTIN_VPHADDWQ,
30541 IX86_BUILTIN_VPHADDDQ,
30542 IX86_BUILTIN_VPHADDUBW,
30543 IX86_BUILTIN_VPHADDUBD,
30544 IX86_BUILTIN_VPHADDUBQ,
30545 IX86_BUILTIN_VPHADDUWD,
30546 IX86_BUILTIN_VPHADDUWQ,
30547 IX86_BUILTIN_VPHADDUDQ,
30548 IX86_BUILTIN_VPHSUBBW,
30549 IX86_BUILTIN_VPHSUBWD,
30550 IX86_BUILTIN_VPHSUBDQ,
30552 IX86_BUILTIN_VPROTB,
30553 IX86_BUILTIN_VPROTW,
30554 IX86_BUILTIN_VPROTD,
30555 IX86_BUILTIN_VPROTQ,
30556 IX86_BUILTIN_VPROTB_IMM,
30557 IX86_BUILTIN_VPROTW_IMM,
30558 IX86_BUILTIN_VPROTD_IMM,
30559 IX86_BUILTIN_VPROTQ_IMM,
30561 IX86_BUILTIN_VPSHLB,
30562 IX86_BUILTIN_VPSHLW,
30563 IX86_BUILTIN_VPSHLD,
30564 IX86_BUILTIN_VPSHLQ,
30565 IX86_BUILTIN_VPSHAB,
30566 IX86_BUILTIN_VPSHAW,
30567 IX86_BUILTIN_VPSHAD,
30568 IX86_BUILTIN_VPSHAQ,
30570 IX86_BUILTIN_VFRCZSS,
30571 IX86_BUILTIN_VFRCZSD,
30572 IX86_BUILTIN_VFRCZPS,
30573 IX86_BUILTIN_VFRCZPD,
30574 IX86_BUILTIN_VFRCZPS256,
30575 IX86_BUILTIN_VFRCZPD256,
30577 IX86_BUILTIN_VPCOMEQUB,
30578 IX86_BUILTIN_VPCOMNEUB,
30579 IX86_BUILTIN_VPCOMLTUB,
30580 IX86_BUILTIN_VPCOMLEUB,
30581 IX86_BUILTIN_VPCOMGTUB,
30582 IX86_BUILTIN_VPCOMGEUB,
30583 IX86_BUILTIN_VPCOMFALSEUB,
30584 IX86_BUILTIN_VPCOMTRUEUB,
30586 IX86_BUILTIN_VPCOMEQUW,
30587 IX86_BUILTIN_VPCOMNEUW,
30588 IX86_BUILTIN_VPCOMLTUW,
30589 IX86_BUILTIN_VPCOMLEUW,
30590 IX86_BUILTIN_VPCOMGTUW,
30591 IX86_BUILTIN_VPCOMGEUW,
30592 IX86_BUILTIN_VPCOMFALSEUW,
30593 IX86_BUILTIN_VPCOMTRUEUW,
30595 IX86_BUILTIN_VPCOMEQUD,
30596 IX86_BUILTIN_VPCOMNEUD,
30597 IX86_BUILTIN_VPCOMLTUD,
30598 IX86_BUILTIN_VPCOMLEUD,
30599 IX86_BUILTIN_VPCOMGTUD,
30600 IX86_BUILTIN_VPCOMGEUD,
30601 IX86_BUILTIN_VPCOMFALSEUD,
30602 IX86_BUILTIN_VPCOMTRUEUD,
30604 IX86_BUILTIN_VPCOMEQUQ,
30605 IX86_BUILTIN_VPCOMNEUQ,
30606 IX86_BUILTIN_VPCOMLTUQ,
30607 IX86_BUILTIN_VPCOMLEUQ,
30608 IX86_BUILTIN_VPCOMGTUQ,
30609 IX86_BUILTIN_VPCOMGEUQ,
30610 IX86_BUILTIN_VPCOMFALSEUQ,
30611 IX86_BUILTIN_VPCOMTRUEUQ,
30613 IX86_BUILTIN_VPCOMEQB,
30614 IX86_BUILTIN_VPCOMNEB,
30615 IX86_BUILTIN_VPCOMLTB,
30616 IX86_BUILTIN_VPCOMLEB,
30617 IX86_BUILTIN_VPCOMGTB,
30618 IX86_BUILTIN_VPCOMGEB,
30619 IX86_BUILTIN_VPCOMFALSEB,
30620 IX86_BUILTIN_VPCOMTRUEB,
30622 IX86_BUILTIN_VPCOMEQW,
30623 IX86_BUILTIN_VPCOMNEW,
30624 IX86_BUILTIN_VPCOMLTW,
30625 IX86_BUILTIN_VPCOMLEW,
30626 IX86_BUILTIN_VPCOMGTW,
30627 IX86_BUILTIN_VPCOMGEW,
30628 IX86_BUILTIN_VPCOMFALSEW,
30629 IX86_BUILTIN_VPCOMTRUEW,
30631 IX86_BUILTIN_VPCOMEQD,
30632 IX86_BUILTIN_VPCOMNED,
30633 IX86_BUILTIN_VPCOMLTD,
30634 IX86_BUILTIN_VPCOMLED,
30635 IX86_BUILTIN_VPCOMGTD,
30636 IX86_BUILTIN_VPCOMGED,
30637 IX86_BUILTIN_VPCOMFALSED,
30638 IX86_BUILTIN_VPCOMTRUED,
30640 IX86_BUILTIN_VPCOMEQQ,
30641 IX86_BUILTIN_VPCOMNEQ,
30642 IX86_BUILTIN_VPCOMLTQ,
30643 IX86_BUILTIN_VPCOMLEQ,
30644 IX86_BUILTIN_VPCOMGTQ,
30645 IX86_BUILTIN_VPCOMGEQ,
30646 IX86_BUILTIN_VPCOMFALSEQ,
30647 IX86_BUILTIN_VPCOMTRUEQ,
30649 /* LWP instructions. */
30650 IX86_BUILTIN_LLWPCB,
30651 IX86_BUILTIN_SLWPCB,
30652 IX86_BUILTIN_LWPVAL32,
30653 IX86_BUILTIN_LWPVAL64,
30654 IX86_BUILTIN_LWPINS32,
30655 IX86_BUILTIN_LWPINS64,
30657 IX86_BUILTIN_CLZS,
30659 /* RTM */
30660 IX86_BUILTIN_XBEGIN,
30661 IX86_BUILTIN_XEND,
30662 IX86_BUILTIN_XABORT,
30663 IX86_BUILTIN_XTEST,
30665 /* MPX */
30666 IX86_BUILTIN_BNDMK,
30667 IX86_BUILTIN_BNDSTX,
30668 IX86_BUILTIN_BNDLDX,
30669 IX86_BUILTIN_BNDCL,
30670 IX86_BUILTIN_BNDCU,
30671 IX86_BUILTIN_BNDRET,
30672 IX86_BUILTIN_BNDNARROW,
30673 IX86_BUILTIN_BNDINT,
30674 IX86_BUILTIN_SIZEOF,
30675 IX86_BUILTIN_BNDLOWER,
30676 IX86_BUILTIN_BNDUPPER,
30678 /* BMI instructions. */
30679 IX86_BUILTIN_BEXTR32,
30680 IX86_BUILTIN_BEXTR64,
30681 IX86_BUILTIN_CTZS,
30683 /* TBM instructions. */
30684 IX86_BUILTIN_BEXTRI32,
30685 IX86_BUILTIN_BEXTRI64,
30687 /* BMI2 instructions. */
30688 IX86_BUILTIN_BZHI32,
30689 IX86_BUILTIN_BZHI64,
30690 IX86_BUILTIN_PDEP32,
30691 IX86_BUILTIN_PDEP64,
30692 IX86_BUILTIN_PEXT32,
30693 IX86_BUILTIN_PEXT64,
30695 /* ADX instructions. */
30696 IX86_BUILTIN_ADDCARRYX32,
30697 IX86_BUILTIN_ADDCARRYX64,
30699 /* SBB instructions. */
30700 IX86_BUILTIN_SBB32,
30701 IX86_BUILTIN_SBB64,
30703 /* FSGSBASE instructions. */
30704 IX86_BUILTIN_RDFSBASE32,
30705 IX86_BUILTIN_RDFSBASE64,
30706 IX86_BUILTIN_RDGSBASE32,
30707 IX86_BUILTIN_RDGSBASE64,
30708 IX86_BUILTIN_WRFSBASE32,
30709 IX86_BUILTIN_WRFSBASE64,
30710 IX86_BUILTIN_WRGSBASE32,
30711 IX86_BUILTIN_WRGSBASE64,
30713 /* RDRND instructions. */
30714 IX86_BUILTIN_RDRAND16_STEP,
30715 IX86_BUILTIN_RDRAND32_STEP,
30716 IX86_BUILTIN_RDRAND64_STEP,
30718 /* RDSEED instructions. */
30719 IX86_BUILTIN_RDSEED16_STEP,
30720 IX86_BUILTIN_RDSEED32_STEP,
30721 IX86_BUILTIN_RDSEED64_STEP,
30723 /* F16C instructions. */
30724 IX86_BUILTIN_CVTPH2PS,
30725 IX86_BUILTIN_CVTPH2PS256,
30726 IX86_BUILTIN_CVTPS2PH,
30727 IX86_BUILTIN_CVTPS2PH256,
30729 /* MONITORX and MWAITX instrucions. */
30730 IX86_BUILTIN_MONITORX,
30731 IX86_BUILTIN_MWAITX,
30733 /* CFString built-in for darwin */
30734 IX86_BUILTIN_CFSTRING,
30736 /* Builtins to get CPU type and supported features. */
30737 IX86_BUILTIN_CPU_INIT,
30738 IX86_BUILTIN_CPU_IS,
30739 IX86_BUILTIN_CPU_SUPPORTS,
30741 /* Read/write FLAGS register built-ins. */
30742 IX86_BUILTIN_READ_FLAGS,
30743 IX86_BUILTIN_WRITE_FLAGS,
30745 IX86_BUILTIN_MAX
30748 /* Table for the ix86 builtin decls. */
30749 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30751 /* Table of all of the builtin functions that are possible with different ISA's
30752 but are waiting to be built until a function is declared to use that
30753 ISA. */
30754 struct builtin_isa {
30755 const char *name; /* function name */
30756 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30757 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
30758 bool const_p; /* true if the declaration is constant */
30759 bool leaf_p; /* true if the declaration has leaf attribute */
30760 bool nothrow_p; /* true if the declaration has nothrow attribute */
30761 bool set_and_not_built_p;
30764 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30766 /* Bits that can still enable any inclusion of a builtin. */
30767 static HOST_WIDE_INT deferred_isa_values = 0;
30769 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
30770 of which isa_flags to use in the ix86_builtins_isa array. Stores the
30771 function decl in the ix86_builtins array. Returns the function decl or
30772 NULL_TREE, if the builtin was not added.
30774 If the front end has a special hook for builtin functions, delay adding
30775 builtin functions that aren't in the current ISA until the ISA is changed
30776 with function specific optimization. Doing so, can save about 300K for the
30777 default compiler. When the builtin is expanded, check at that time whether
30778 it is valid.
30780 If the front end doesn't have a special hook, record all builtins, even if
30781 it isn't an instruction set in the current ISA in case the user uses
30782 function specific options for a different ISA, so that we don't get scope
30783 errors if a builtin is added in the middle of a function scope. */
30785 static inline tree
30786 def_builtin (HOST_WIDE_INT mask, const char *name,
30787 enum ix86_builtin_func_type tcode,
30788 enum ix86_builtins code)
30790 tree decl = NULL_TREE;
30792 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30794 ix86_builtins_isa[(int) code].isa = mask;
30796 mask &= ~OPTION_MASK_ISA_64BIT;
30797 if (mask == 0
30798 || (mask & ix86_isa_flags) != 0
30799 || (lang_hooks.builtin_function
30800 == lang_hooks.builtin_function_ext_scope))
30803 tree type = ix86_get_builtin_func_type (tcode);
30804 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30805 NULL, NULL_TREE);
30806 ix86_builtins[(int) code] = decl;
30807 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30809 else
30811 /* Just a MASK where set_and_not_built_p == true can potentially
30812 include a builtin. */
30813 deferred_isa_values |= mask;
30814 ix86_builtins[(int) code] = NULL_TREE;
30815 ix86_builtins_isa[(int) code].tcode = tcode;
30816 ix86_builtins_isa[(int) code].name = name;
30817 ix86_builtins_isa[(int) code].leaf_p = false;
30818 ix86_builtins_isa[(int) code].nothrow_p = false;
30819 ix86_builtins_isa[(int) code].const_p = false;
30820 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30824 return decl;
30827 /* Like def_builtin, but also marks the function decl "const". */
30829 static inline tree
30830 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30831 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30833 tree decl = def_builtin (mask, name, tcode, code);
30834 if (decl)
30835 TREE_READONLY (decl) = 1;
30836 else
30837 ix86_builtins_isa[(int) code].const_p = true;
30839 return decl;
30842 /* Add any new builtin functions for a given ISA that may not have been
30843 declared. This saves a bit of space compared to adding all of the
30844 declarations to the tree, even if we didn't use them. */
30846 static void
30847 ix86_add_new_builtins (HOST_WIDE_INT isa)
30849 if ((isa & deferred_isa_values) == 0)
30850 return;
30852 /* Bits in ISA value can be removed from potential isa values. */
30853 deferred_isa_values &= ~isa;
30855 int i;
30856 tree saved_current_target_pragma = current_target_pragma;
30857 current_target_pragma = NULL_TREE;
30859 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30861 if ((ix86_builtins_isa[i].isa & isa) != 0
30862 && ix86_builtins_isa[i].set_and_not_built_p)
30864 tree decl, type;
30866 /* Don't define the builtin again. */
30867 ix86_builtins_isa[i].set_and_not_built_p = false;
30869 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30870 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30871 type, i, BUILT_IN_MD, NULL,
30872 NULL_TREE);
30874 ix86_builtins[i] = decl;
30875 if (ix86_builtins_isa[i].const_p)
30876 TREE_READONLY (decl) = 1;
30877 if (ix86_builtins_isa[i].leaf_p)
30878 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30879 NULL_TREE);
30880 if (ix86_builtins_isa[i].nothrow_p)
30881 TREE_NOTHROW (decl) = 1;
30885 current_target_pragma = saved_current_target_pragma;
30888 /* Bits for builtin_description.flag. */
30890 /* Set when we don't support the comparison natively, and should
30891 swap_comparison in order to support it. */
30892 #define BUILTIN_DESC_SWAP_OPERANDS 1
30894 struct builtin_description
30896 const HOST_WIDE_INT mask;
30897 const enum insn_code icode;
30898 const char *const name;
30899 const enum ix86_builtins code;
30900 const enum rtx_code comparison;
30901 const int flag;
30904 static const struct builtin_description bdesc_comi[] =
30906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30907 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30908 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30909 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30910 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30911 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30912 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30913 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30914 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30915 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30916 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30917 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30932 static const struct builtin_description bdesc_pcmpestr[] =
30934 /* SSE4.2 */
30935 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30936 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30937 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30938 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30939 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30940 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30941 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30944 static const struct builtin_description bdesc_pcmpistr[] =
30946 /* SSE4.2 */
30947 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30948 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30949 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30950 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30951 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30952 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30953 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30956 /* Special builtins with variable number of arguments. */
30957 static const struct builtin_description bdesc_special_args[] =
30959 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30960 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30961 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30963 /* 80387 (for use internally for atomic compound assignment). */
30964 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30965 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30966 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30967 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30969 /* MMX */
30970 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30972 /* 3DNow! */
30973 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30975 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30976 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30977 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30978 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30979 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30980 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30981 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30982 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30983 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30985 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30986 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30987 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30988 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30989 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30990 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30991 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30992 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30994 /* SSE */
30995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31004 /* SSE or 3DNow!A */
31005 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31006 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
31008 /* SSE2 */
31009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
31013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
31015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
31016 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
31017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
31018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31023 /* SSE3 */
31024 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31026 /* SSE4.1 */
31027 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
31029 /* SSE4A */
31030 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31031 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31033 /* AVX */
31034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
31035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
31037 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31038 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31039 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
31041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
31043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
31049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
31052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31064 /* AVX2 */
31065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31075 /* AVX512F */
31076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31124 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31125 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31126 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31127 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31128 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31129 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31131 /* FSGSBASE */
31132 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31133 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31134 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31135 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31136 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31137 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31138 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31139 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31141 /* RTM */
31142 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31143 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31144 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31146 /* AVX512BW */
31147 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31148 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31149 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31150 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31152 /* AVX512VL */
31153 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31154 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31155 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31156 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31162 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31163 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31164 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31165 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31181 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31182 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31183 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31184 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31189 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31190 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31191 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31192 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31193 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31194 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31197 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31198 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31199 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31200 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31201 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31202 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31203 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31204 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31205 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31206 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31207 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31208 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31209 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31210 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31211 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31212 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31213 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31214 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31215 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31216 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31217 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31218 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31219 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31220 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31221 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31222 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31223 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31224 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31225 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31226 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31227 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31228 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31229 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31230 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31231 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31232 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31233 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31234 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31235 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31236 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31237 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31238 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31239 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31240 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31241 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31242 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31243 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31244 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31245 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31246 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31248 /* PCOMMIT. */
31249 { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31252 /* Builtins with variable number of arguments. */
31253 static const struct builtin_description bdesc_args[] =
31255 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31256 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31257 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31258 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31259 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31260 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31261 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31263 /* MMX */
31264 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31269 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31276 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31283 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31310 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31317 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31324 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31327 /* 3DNow! */
31328 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31329 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31330 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31331 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31333 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31334 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31335 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31336 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31337 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31338 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31339 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31340 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31341 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31342 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31343 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31344 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31345 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31346 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31347 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31349 /* 3DNow!A */
31350 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31351 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31352 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31353 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31354 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31355 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31357 /* SSE */
31358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31360 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31362 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31363 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31369 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31373 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31374 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31375 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31389 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31392 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31393 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31395 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31403 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31404 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31408 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31410 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31411 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31413 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31418 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31419 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31423 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31425 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31427 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31428 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31431 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31432 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31434 /* SSE MMX or 3Dnow!A */
31435 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31436 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31437 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31439 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31440 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31441 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31442 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31444 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31445 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31447 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31449 /* SSE2 */
31450 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
31453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31456 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31468 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31469 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31473 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31475 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31476 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31477 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31478 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31486 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31506 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31510 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31512 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31513 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31515 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31518 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31519 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31521 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31523 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31524 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31525 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31526 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31527 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31528 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31529 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31530 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31541 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31542 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31544 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31545 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31546 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31547 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31559 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31560 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31561 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31564 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31565 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31566 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31567 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31568 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31569 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31570 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31571 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31577 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31581 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31586 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31591 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31592 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31593 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31594 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31595 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31596 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31599 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31600 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31601 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31602 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31603 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31604 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31606 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31607 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31608 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31609 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31619 /* SSE2 MMX */
31620 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31621 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31623 /* SSE3 */
31624 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31625 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31627 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31628 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31629 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31630 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31631 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31632 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31634 /* SSSE3 */
31635 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31636 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31637 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31638 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31639 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31640 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31644 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31646 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31648 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31649 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31652 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31653 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31654 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31655 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31656 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31657 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31658 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31659 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31660 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31661 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31662 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31663 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31664 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31665 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31667 /* SSSE3. */
31668 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31669 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31671 /* SSE4.1 */
31672 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31673 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31674 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31676 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31678 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31680 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31681 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31685 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31691 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31692 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31693 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31694 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31695 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31697 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31698 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31699 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31700 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31701 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31702 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31703 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31704 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31705 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31706 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31707 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31708 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31710 /* SSE4.1 */
31711 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31712 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31713 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31714 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31716 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31717 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31718 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31719 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31721 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31722 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31724 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31725 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31727 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31728 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31729 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31730 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31732 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31733 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31735 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31736 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31738 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31739 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31740 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31742 /* SSE4.2 */
31743 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31744 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31745 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31746 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31747 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31749 /* SSE4A */
31750 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31751 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31752 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31753 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31755 /* AES */
31756 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31757 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31759 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31760 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31761 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31762 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31764 /* PCLMUL */
31765 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31767 /* AVX */
31768 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31769 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31772 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31773 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31776 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31781 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31782 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31783 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31784 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31785 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31786 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31787 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31788 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31789 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31790 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31791 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31792 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31793 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31803 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31816 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31817 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31821 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31823 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31839 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31841 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31843 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31855 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31856 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31869 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31870 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31880 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31881 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31882 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31903 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31904 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31906 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31908 /* AVX2 */
31909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31910 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31911 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31912 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31917 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31918 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31919 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31920 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31926 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31948 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31949 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31950 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31951 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31952 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31953 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31954 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31955 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31956 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31957 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31958 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31959 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31973 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31975 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31976 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31977 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31978 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31979 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31980 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31990 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31991 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31992 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31993 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31994 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31995 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31996 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31997 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31998 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31999 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32001 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32002 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32003 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32004 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32005 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32006 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32007 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32008 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32009 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32010 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32023 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
32031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
32032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
32042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
32043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
32044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
32045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32056 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32058 /* BMI */
32059 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32060 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32061 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32063 /* TBM */
32064 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32065 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32067 /* F16C */
32068 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32069 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32070 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32071 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32073 /* BMI2 */
32074 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32075 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32076 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32077 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32078 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32079 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32081 /* AVX512F */
32082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
32096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
32098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
32106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
32113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
32114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
32115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
32116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
32117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
32118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
32119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
32120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
32137 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
32138 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
32139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
32141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32248 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32249 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32250 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32251 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32283 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32288 /* Mask arithmetic operations */
32289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32300 /* SHA */
32301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32303 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32309 /* AVX512VL. */
32310 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32311 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32312 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32313 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32314 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32315 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32316 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32317 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32318 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32319 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32320 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32321 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32322 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32323 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32324 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32325 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32326 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32327 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32328 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32329 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32330 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32331 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32332 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32333 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32348 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32349 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32350 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32351 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32352 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32353 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32354 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32355 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32356 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32357 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32358 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32359 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32360 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32365 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32367 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32368 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32369 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32370 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32371 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32372 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32373 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32374 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32377 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32378 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32379 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32380 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32401 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32402 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32403 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32404 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32405 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32406 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32407 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32408 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32420 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32421 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32424 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32425 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32426 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32427 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32428 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32429 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32430 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32431 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32432 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32433 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32434 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32435 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32436 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32437 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32438 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32439 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32440 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32441 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32442 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32443 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32444 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32445 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32446 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32447 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32448 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32449 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32450 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32451 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32452 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32453 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32454 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32455 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32456 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32457 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32458 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32459 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32460 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32461 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32462 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32463 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32464 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32465 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32466 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32467 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32468 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32469 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32470 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32471 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32472 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32473 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32474 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32475 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32476 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32477 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32478 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32480 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32482 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32483 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32484 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32485 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32486 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32487 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32488 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32492 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32494 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32495 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32496 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32497 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32498 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32499 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32502 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32503 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32504 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32505 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32510 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32511 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32512 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32513 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32514 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32515 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32517 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32518 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32519 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32520 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32521 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32522 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32523 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32524 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32525 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32526 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32527 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32528 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32529 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32534 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32535 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32536 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32537 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32538 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32539 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32540 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32541 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32542 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32543 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32546 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32547 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32548 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32549 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32566 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32567 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32568 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32569 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32574 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32575 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32576 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32577 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32578 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32579 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32580 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32581 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32582 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32583 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32584 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32586 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32587 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32590 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32591 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32592 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32595 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32596 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32607 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32608 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32619 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32620 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32621 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32622 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32623 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32624 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32625 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32626 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32627 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32629 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32630 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32631 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32632 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32645 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32646 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32649 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32650 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32653 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32654 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32655 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32656 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32657 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32658 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32659 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32660 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32661 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32662 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32665 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32666 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32667 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32668 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32669 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32670 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32673 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32674 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32675 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32676 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32681 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32682 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32683 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32684 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32685 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32686 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32692 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32693 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32694 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32695 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32696 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32697 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32698 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32699 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32700 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32701 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32706 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32707 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32708 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32709 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32717 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32718 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32719 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32720 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32721 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32722 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32723 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32724 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32725 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32726 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32727 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32741 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32742 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32743 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32744 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32745 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32746 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32747 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32748 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32749 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32750 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32751 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32752 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32753 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32754 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32755 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32756 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32757 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32758 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32759 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32762 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32765 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32766 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32784 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32785 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32786 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32787 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32788 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32789 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32790 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32791 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32800 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32801 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32802 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32803 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32804 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32805 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32807 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32808 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32809 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32810 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32811 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32812 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32813 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32814 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32815 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32816 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32817 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32818 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32819 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32820 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32821 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32822 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32823 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32824 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32825 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32830 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32831 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32832 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32833 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32838 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32839 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32840 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32841 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32846 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32847 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32848 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32849 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32854 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32855 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32856 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32857 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32867 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32868 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32869 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32870 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32871 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32872 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32873 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32874 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32875 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32876 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32881 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32882 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32883 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32884 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32895 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32896 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32897 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32898 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32899 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32900 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32901 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32902 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32903 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32904 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32905 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32906 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32907 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32908 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32909 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32910 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32911 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32912 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32913 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32914 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32915 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32916 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32917 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32925 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32926 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32927 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32928 { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32929 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32930 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32931 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32932 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32946 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32947 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32948 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32949 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32950 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32951 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32952 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32953 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32954 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32955 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32956 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32957 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32958 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32964 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32965 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32966 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32967 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32968 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32969 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32970 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32975 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32976 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32977 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32978 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32979 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32980 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32981 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32982 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32983 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32984 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32985 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32986 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32987 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32988 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32989 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32990 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32991 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32992 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32993 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32994 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32995 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32996 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32997 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32998 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32999 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33000 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
33002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33005 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33009 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
33011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
33012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33013 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33014 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33015 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33017 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33018 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33019 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
33021 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
33023 /* AVX512DQ. */
33024 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
33025 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
33026 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
33027 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
33028 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
33029 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
33030 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
33031 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
33032 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
33033 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
33034 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
33035 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
33036 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33037 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33038 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33039 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33040 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33041 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33042 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33043 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
33044 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33045 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
33046 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
33047 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
33048 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
33049 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
33050 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
33051 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
33052 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
33053 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
33054 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
33056 /* AVX512BW. */
33057 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
33058 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
33059 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33060 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33061 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33062 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33063 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33064 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
33065 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33066 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33067 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33068 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
33069 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
33070 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
33071 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
33072 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
33073 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33074 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33075 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33076 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33077 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33078 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33079 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33080 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33081 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33082 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33083 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33084 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33085 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33086 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33087 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33088 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33089 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33090 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33091 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33092 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33093 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33094 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33095 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33096 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33097 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33098 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33099 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33100 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33101 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33102 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33103 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33104 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33105 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33106 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33107 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33108 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33109 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33110 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33111 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33112 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33113 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
33114 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
33115 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33116 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33117 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33118 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33119 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33120 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33121 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33122 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33123 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33124 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33125 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33126 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33127 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
33128 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
33129 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
33130 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
33131 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33132 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33133 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33134 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33135 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33136 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33137 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33138 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33139 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33140 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33141 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33142 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33143 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33144 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33145 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33146 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33147 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33149 /* AVX512IFMA */
33150 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33151 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33152 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33153 { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33154 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33155 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33156 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33157 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33158 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33159 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33160 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33161 { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33163 /* AVX512VBMI */
33164 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33165 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33166 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33167 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33168 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33169 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33170 { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33171 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33172 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33173 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33174 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33175 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33176 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33177 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33178 { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33181 /* Builtins with rounding support. */
33182 static const struct builtin_description bdesc_round_args[] =
33184 /* AVX512F */
33185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33204 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33206 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33213 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33215 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33265 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33267 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33269 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33271 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33273 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33275 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33277 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33279 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33305 /* AVX512ER */
33306 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33307 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33308 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33309 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33310 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33311 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33312 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33313 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33314 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33315 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33317 /* AVX512DQ. */
33318 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33319 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33320 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33321 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33322 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33323 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33324 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33325 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33326 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33327 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33328 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33329 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33330 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33331 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33332 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33333 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33336 /* Bultins for MPX. */
33337 static const struct builtin_description bdesc_mpx[] =
33339 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33340 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33341 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33344 /* Const builtins for MPX. */
33345 static const struct builtin_description bdesc_mpx_const[] =
33347 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33348 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33349 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33350 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33351 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33352 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33353 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33354 { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33357 /* FMA4 and XOP. */
33358 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33359 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33360 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33361 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33362 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
33363 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
33364 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
33365 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
33366 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
33367 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
33368 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
33369 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
33370 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
33371 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
33372 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
33373 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
33374 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
33375 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
33376 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
33377 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
33378 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
33379 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
33380 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
33381 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
33382 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
33383 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
33384 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
33385 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
33386 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
33387 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
33388 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
33389 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
33390 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
33391 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
33392 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
33393 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
33394 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
33395 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
33396 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
33397 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
33398 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
33399 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
33400 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
33401 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
33402 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
33403 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
33404 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
33405 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
33406 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
33407 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
33408 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
33409 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
33411 static const struct builtin_description bdesc_multi_arg[] =
33413 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33414 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33415 UNKNOWN, (int)MULTI_ARG_3_SF },
33416 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33417 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33418 UNKNOWN, (int)MULTI_ARG_3_DF },
33420 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33421 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33422 UNKNOWN, (int)MULTI_ARG_3_SF },
33423 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33424 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33425 UNKNOWN, (int)MULTI_ARG_3_DF },
33427 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33428 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33429 UNKNOWN, (int)MULTI_ARG_3_SF },
33430 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33431 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33432 UNKNOWN, (int)MULTI_ARG_3_DF },
33433 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33434 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33435 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33436 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33437 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33438 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33440 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33441 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33442 UNKNOWN, (int)MULTI_ARG_3_SF },
33443 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33444 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33445 UNKNOWN, (int)MULTI_ARG_3_DF },
33446 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33447 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33448 UNKNOWN, (int)MULTI_ARG_3_SF2 },
33449 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33450 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33451 UNKNOWN, (int)MULTI_ARG_3_DF2 },
33453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
33454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
33455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
33456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
33457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
33458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
33459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
33461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
33463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
33464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
33465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
33466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
33467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
33469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
33471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
33473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
33477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
33481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
33484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
33486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
33487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
33488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
33489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
33490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
33491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
33492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
33494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
33495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
33496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
33497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
33498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
33499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
33501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
33502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
33503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
33504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
33505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
33506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
33508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
33516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
33517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
33519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
33521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
33522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
33524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
33525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
33527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
33528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
33529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
33530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
33532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
33533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
33535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
33536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
33537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
33538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
33540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
33541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
33543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
33544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
33545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
33546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
33548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
33551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
33552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
33553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
33554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
33556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
33557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
33559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
33560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
33561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
33562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
33564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
33565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
33567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
33568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
33569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
33570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
33572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
33573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
33575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
33576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
33577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
33578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
33580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
33581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
33583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
33584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
33585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
33586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
33588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
33593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
33594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
33595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
33597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
33602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
33603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
33604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
33606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33613 /* TM vector builtins. */
33615 /* Reuse the existing x86-specific `struct builtin_description' cause
33616 we're lazy. Add casts to make them fit. */
33617 static const struct builtin_description bdesc_tm[] =
33619 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33620 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33621 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33622 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33623 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33624 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33625 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33627 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33628 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33629 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33630 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33631 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33632 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33633 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33635 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33636 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33637 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33638 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33639 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33640 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33641 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33643 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33644 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33645 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33648 /* TM callbacks. */
33650 /* Return the builtin decl needed to load a vector of TYPE. */
33652 static tree
33653 ix86_builtin_tm_load (tree type)
33655 if (TREE_CODE (type) == VECTOR_TYPE)
33657 switch (tree_to_uhwi (TYPE_SIZE (type)))
33659 case 64:
33660 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33661 case 128:
33662 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33663 case 256:
33664 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33667 return NULL_TREE;
33670 /* Return the builtin decl needed to store a vector of TYPE. */
33672 static tree
33673 ix86_builtin_tm_store (tree type)
33675 if (TREE_CODE (type) == VECTOR_TYPE)
33677 switch (tree_to_uhwi (TYPE_SIZE (type)))
33679 case 64:
33680 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33681 case 128:
33682 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33683 case 256:
33684 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33687 return NULL_TREE;
33690 /* Initialize the transactional memory vector load/store builtins. */
33692 static void
33693 ix86_init_tm_builtins (void)
33695 enum ix86_builtin_func_type ftype;
33696 const struct builtin_description *d;
33697 size_t i;
33698 tree decl;
33699 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33700 tree attrs_log, attrs_type_log;
33702 if (!flag_tm)
33703 return;
33705 /* If there are no builtins defined, we must be compiling in a
33706 language without trans-mem support. */
33707 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33708 return;
33710 /* Use whatever attributes a normal TM load has. */
33711 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33712 attrs_load = DECL_ATTRIBUTES (decl);
33713 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33714 /* Use whatever attributes a normal TM store has. */
33715 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33716 attrs_store = DECL_ATTRIBUTES (decl);
33717 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33718 /* Use whatever attributes a normal TM log has. */
33719 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33720 attrs_log = DECL_ATTRIBUTES (decl);
33721 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33723 for (i = 0, d = bdesc_tm;
33724 i < ARRAY_SIZE (bdesc_tm);
33725 i++, d++)
33727 if ((d->mask & ix86_isa_flags) != 0
33728 || (lang_hooks.builtin_function
33729 == lang_hooks.builtin_function_ext_scope))
33731 tree type, attrs, attrs_type;
33732 enum built_in_function code = (enum built_in_function) d->code;
33734 ftype = (enum ix86_builtin_func_type) d->flag;
33735 type = ix86_get_builtin_func_type (ftype);
33737 if (BUILTIN_TM_LOAD_P (code))
33739 attrs = attrs_load;
33740 attrs_type = attrs_type_load;
33742 else if (BUILTIN_TM_STORE_P (code))
33744 attrs = attrs_store;
33745 attrs_type = attrs_type_store;
33747 else
33749 attrs = attrs_log;
33750 attrs_type = attrs_type_log;
33752 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33753 /* The builtin without the prefix for
33754 calling it directly. */
33755 d->name + strlen ("__builtin_"),
33756 attrs);
33757 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33758 set the TYPE_ATTRIBUTES. */
33759 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33761 set_builtin_decl (code, decl, false);
33766 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33767 in the current target ISA to allow the user to compile particular modules
33768 with different target specific options that differ from the command line
33769 options. */
33770 static void
33771 ix86_init_mmx_sse_builtins (void)
33773 const struct builtin_description * d;
33774 enum ix86_builtin_func_type ftype;
33775 size_t i;
33777 /* Add all special builtins with variable number of operands. */
33778 for (i = 0, d = bdesc_special_args;
33779 i < ARRAY_SIZE (bdesc_special_args);
33780 i++, d++)
33782 if (d->name == 0)
33783 continue;
33785 ftype = (enum ix86_builtin_func_type) d->flag;
33786 def_builtin (d->mask, d->name, ftype, d->code);
33789 /* Add all builtins with variable number of operands. */
33790 for (i = 0, d = bdesc_args;
33791 i < ARRAY_SIZE (bdesc_args);
33792 i++, d++)
33794 if (d->name == 0)
33795 continue;
33797 ftype = (enum ix86_builtin_func_type) d->flag;
33798 def_builtin_const (d->mask, d->name, ftype, d->code);
33801 /* Add all builtins with rounding. */
33802 for (i = 0, d = bdesc_round_args;
33803 i < ARRAY_SIZE (bdesc_round_args);
33804 i++, d++)
33806 if (d->name == 0)
33807 continue;
33809 ftype = (enum ix86_builtin_func_type) d->flag;
33810 def_builtin_const (d->mask, d->name, ftype, d->code);
33813 /* pcmpestr[im] insns. */
33814 for (i = 0, d = bdesc_pcmpestr;
33815 i < ARRAY_SIZE (bdesc_pcmpestr);
33816 i++, d++)
33818 if (d->code == IX86_BUILTIN_PCMPESTRM128)
33819 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33820 else
33821 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33822 def_builtin_const (d->mask, d->name, ftype, d->code);
33825 /* pcmpistr[im] insns. */
33826 for (i = 0, d = bdesc_pcmpistr;
33827 i < ARRAY_SIZE (bdesc_pcmpistr);
33828 i++, d++)
33830 if (d->code == IX86_BUILTIN_PCMPISTRM128)
33831 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33832 else
33833 ftype = INT_FTYPE_V16QI_V16QI_INT;
33834 def_builtin_const (d->mask, d->name, ftype, d->code);
33837 /* comi/ucomi insns. */
33838 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33840 if (d->mask == OPTION_MASK_ISA_SSE2)
33841 ftype = INT_FTYPE_V2DF_V2DF;
33842 else
33843 ftype = INT_FTYPE_V4SF_V4SF;
33844 def_builtin_const (d->mask, d->name, ftype, d->code);
33847 /* SSE */
33848 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33849 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33850 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33851 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33853 /* SSE or 3DNow!A */
33854 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33855 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33856 IX86_BUILTIN_MASKMOVQ);
33858 /* SSE2 */
33859 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33860 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33862 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33863 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33864 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33865 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33867 /* SSE3. */
33868 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33869 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33870 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33871 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33873 /* AES */
33874 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33875 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33876 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33877 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33878 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33879 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33880 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33881 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33882 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33883 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33884 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33885 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33887 /* PCLMUL */
33888 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33889 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33891 /* RDRND */
33892 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33893 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33894 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33895 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33896 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33897 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33898 IX86_BUILTIN_RDRAND64_STEP);
33900 /* AVX2 */
33901 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33902 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33903 IX86_BUILTIN_GATHERSIV2DF);
33905 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33906 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33907 IX86_BUILTIN_GATHERSIV4DF);
33909 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33910 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33911 IX86_BUILTIN_GATHERDIV2DF);
33913 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33914 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33915 IX86_BUILTIN_GATHERDIV4DF);
33917 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33918 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33919 IX86_BUILTIN_GATHERSIV4SF);
33921 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33922 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33923 IX86_BUILTIN_GATHERSIV8SF);
33925 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33926 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33927 IX86_BUILTIN_GATHERDIV4SF);
33929 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33930 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33931 IX86_BUILTIN_GATHERDIV8SF);
33933 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33934 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33935 IX86_BUILTIN_GATHERSIV2DI);
33937 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33938 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33939 IX86_BUILTIN_GATHERSIV4DI);
33941 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33942 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33943 IX86_BUILTIN_GATHERDIV2DI);
33945 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33946 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33947 IX86_BUILTIN_GATHERDIV4DI);
33949 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33950 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33951 IX86_BUILTIN_GATHERSIV4SI);
33953 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33954 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33955 IX86_BUILTIN_GATHERSIV8SI);
33957 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33958 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33959 IX86_BUILTIN_GATHERDIV4SI);
33961 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33962 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33963 IX86_BUILTIN_GATHERDIV8SI);
33965 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33966 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33967 IX86_BUILTIN_GATHERALTSIV4DF);
33969 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33970 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33971 IX86_BUILTIN_GATHERALTDIV8SF);
33973 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33974 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33975 IX86_BUILTIN_GATHERALTSIV4DI);
33977 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33978 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33979 IX86_BUILTIN_GATHERALTDIV8SI);
33981 /* AVX512F */
33982 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33983 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33984 IX86_BUILTIN_GATHER3SIV16SF);
33986 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33987 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33988 IX86_BUILTIN_GATHER3SIV8DF);
33990 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33991 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33992 IX86_BUILTIN_GATHER3DIV16SF);
33994 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33995 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33996 IX86_BUILTIN_GATHER3DIV8DF);
33998 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33999 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
34000 IX86_BUILTIN_GATHER3SIV16SI);
34002 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
34003 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
34004 IX86_BUILTIN_GATHER3SIV8DI);
34006 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
34007 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
34008 IX86_BUILTIN_GATHER3DIV16SI);
34010 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
34011 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
34012 IX86_BUILTIN_GATHER3DIV8DI);
34014 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
34015 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
34016 IX86_BUILTIN_GATHER3ALTSIV8DF);
34018 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
34019 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
34020 IX86_BUILTIN_GATHER3ALTDIV16SF);
34022 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
34023 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
34024 IX86_BUILTIN_GATHER3ALTSIV8DI);
34026 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
34027 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
34028 IX86_BUILTIN_GATHER3ALTDIV16SI);
34030 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
34031 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
34032 IX86_BUILTIN_SCATTERSIV16SF);
34034 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
34035 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
34036 IX86_BUILTIN_SCATTERSIV8DF);
34038 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
34039 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
34040 IX86_BUILTIN_SCATTERDIV16SF);
34042 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
34043 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
34044 IX86_BUILTIN_SCATTERDIV8DF);
34046 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
34047 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
34048 IX86_BUILTIN_SCATTERSIV16SI);
34050 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
34051 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
34052 IX86_BUILTIN_SCATTERSIV8DI);
34054 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34055 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34056 IX86_BUILTIN_SCATTERDIV16SI);
34058 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34059 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34060 IX86_BUILTIN_SCATTERDIV8DI);
34062 /* AVX512VL */
34063 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34064 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34065 IX86_BUILTIN_GATHER3SIV2DF);
34067 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34068 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34069 IX86_BUILTIN_GATHER3SIV4DF);
34071 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34072 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34073 IX86_BUILTIN_GATHER3DIV2DF);
34075 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34076 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34077 IX86_BUILTIN_GATHER3DIV4DF);
34079 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34080 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34081 IX86_BUILTIN_GATHER3SIV4SF);
34083 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34084 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34085 IX86_BUILTIN_GATHER3SIV8SF);
34087 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34088 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34089 IX86_BUILTIN_GATHER3DIV4SF);
34091 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34092 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34093 IX86_BUILTIN_GATHER3DIV8SF);
34095 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34096 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34097 IX86_BUILTIN_GATHER3SIV2DI);
34099 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34100 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34101 IX86_BUILTIN_GATHER3SIV4DI);
34103 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34104 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34105 IX86_BUILTIN_GATHER3DIV2DI);
34107 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34108 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34109 IX86_BUILTIN_GATHER3DIV4DI);
34111 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34112 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34113 IX86_BUILTIN_GATHER3SIV4SI);
34115 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34116 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34117 IX86_BUILTIN_GATHER3SIV8SI);
34119 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34120 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34121 IX86_BUILTIN_GATHER3DIV4SI);
34123 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34124 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34125 IX86_BUILTIN_GATHER3DIV8SI);
34127 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34128 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34129 IX86_BUILTIN_GATHER3ALTSIV4DF);
34131 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34132 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34133 IX86_BUILTIN_GATHER3ALTDIV8SF);
34135 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34136 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34137 IX86_BUILTIN_GATHER3ALTSIV4DI);
34139 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34140 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34141 IX86_BUILTIN_GATHER3ALTDIV8SI);
34143 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34144 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34145 IX86_BUILTIN_SCATTERSIV8SF);
34147 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34148 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34149 IX86_BUILTIN_SCATTERSIV4SF);
34151 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34152 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34153 IX86_BUILTIN_SCATTERSIV4DF);
34155 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34156 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34157 IX86_BUILTIN_SCATTERSIV2DF);
34159 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34160 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34161 IX86_BUILTIN_SCATTERDIV8SF);
34163 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34164 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34165 IX86_BUILTIN_SCATTERDIV4SF);
34167 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34168 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34169 IX86_BUILTIN_SCATTERDIV4DF);
34171 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34172 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34173 IX86_BUILTIN_SCATTERDIV2DF);
34175 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34176 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34177 IX86_BUILTIN_SCATTERSIV8SI);
34179 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34180 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34181 IX86_BUILTIN_SCATTERSIV4SI);
34183 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34184 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34185 IX86_BUILTIN_SCATTERSIV4DI);
34187 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34188 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34189 IX86_BUILTIN_SCATTERSIV2DI);
34191 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34192 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34193 IX86_BUILTIN_SCATTERDIV8SI);
34195 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34196 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34197 IX86_BUILTIN_SCATTERDIV4SI);
34199 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34200 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34201 IX86_BUILTIN_SCATTERDIV4DI);
34203 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34204 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34205 IX86_BUILTIN_SCATTERDIV2DI);
34207 /* AVX512PF */
34208 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34209 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34210 IX86_BUILTIN_GATHERPFDPD);
34211 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34212 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34213 IX86_BUILTIN_GATHERPFDPS);
34214 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34215 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34216 IX86_BUILTIN_GATHERPFQPD);
34217 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34218 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34219 IX86_BUILTIN_GATHERPFQPS);
34220 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34221 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34222 IX86_BUILTIN_SCATTERPFDPD);
34223 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34224 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34225 IX86_BUILTIN_SCATTERPFDPS);
34226 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34227 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34228 IX86_BUILTIN_SCATTERPFQPD);
34229 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34230 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34231 IX86_BUILTIN_SCATTERPFQPS);
34233 /* SHA */
34234 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34235 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34236 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34237 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34238 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34239 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34240 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34241 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34242 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34243 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34244 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34245 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34246 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34247 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34249 /* RTM. */
34250 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34251 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34253 /* MMX access to the vec_init patterns. */
34254 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34255 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34257 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34258 V4HI_FTYPE_HI_HI_HI_HI,
34259 IX86_BUILTIN_VEC_INIT_V4HI);
34261 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34262 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34263 IX86_BUILTIN_VEC_INIT_V8QI);
34265 /* Access to the vec_extract patterns. */
34266 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34267 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34268 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34269 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34270 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34271 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34272 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34273 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34274 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34275 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34277 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34278 "__builtin_ia32_vec_ext_v4hi",
34279 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34281 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34282 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34284 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34285 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34287 /* Access to the vec_set patterns. */
34288 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34289 "__builtin_ia32_vec_set_v2di",
34290 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34292 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34293 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34295 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34296 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34298 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34299 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34301 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34302 "__builtin_ia32_vec_set_v4hi",
34303 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34305 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34306 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34308 /* RDSEED */
34309 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34310 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34311 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34312 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34313 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34314 "__builtin_ia32_rdseed_di_step",
34315 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34317 /* ADCX */
34318 def_builtin (0, "__builtin_ia32_addcarryx_u32",
34319 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34320 def_builtin (OPTION_MASK_ISA_64BIT,
34321 "__builtin_ia32_addcarryx_u64",
34322 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34323 IX86_BUILTIN_ADDCARRYX64);
34325 /* SBB */
34326 def_builtin (0, "__builtin_ia32_sbb_u32",
34327 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34328 def_builtin (OPTION_MASK_ISA_64BIT,
34329 "__builtin_ia32_sbb_u64",
34330 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34331 IX86_BUILTIN_SBB64);
34333 /* Read/write FLAGS. */
34334 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34335 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34336 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34337 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34338 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34339 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34340 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34341 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34343 /* CLFLUSHOPT. */
34344 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34345 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34347 /* CLWB. */
34348 def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34349 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34351 /* MONITORX and MWAITX. */
34352 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34353 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34354 def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34355 VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34357 /* Add FMA4 multi-arg argument instructions */
34358 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34360 if (d->name == 0)
34361 continue;
34363 ftype = (enum ix86_builtin_func_type) d->flag;
34364 def_builtin_const (d->mask, d->name, ftype, d->code);
34368 static void
34369 ix86_init_mpx_builtins ()
34371 const struct builtin_description * d;
34372 enum ix86_builtin_func_type ftype;
34373 tree decl;
34374 size_t i;
34376 for (i = 0, d = bdesc_mpx;
34377 i < ARRAY_SIZE (bdesc_mpx);
34378 i++, d++)
34380 if (d->name == 0)
34381 continue;
34383 ftype = (enum ix86_builtin_func_type) d->flag;
34384 decl = def_builtin (d->mask, d->name, ftype, d->code);
34386 /* With no leaf and nothrow flags for MPX builtins
34387 abnormal edges may follow its call when setjmp
34388 presents in the function. Since we may have a lot
34389 of MPX builtins calls it causes lots of useless
34390 edges and enormous PHI nodes. To avoid this we mark
34391 MPX builtins as leaf and nothrow. */
34392 if (decl)
34394 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34395 NULL_TREE);
34396 TREE_NOTHROW (decl) = 1;
34398 else
34400 ix86_builtins_isa[(int)d->code].leaf_p = true;
34401 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34405 for (i = 0, d = bdesc_mpx_const;
34406 i < ARRAY_SIZE (bdesc_mpx_const);
34407 i++, d++)
34409 if (d->name == 0)
34410 continue;
34412 ftype = (enum ix86_builtin_func_type) d->flag;
34413 decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34415 if (decl)
34417 DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34418 NULL_TREE);
34419 TREE_NOTHROW (decl) = 1;
34421 else
34423 ix86_builtins_isa[(int)d->code].leaf_p = true;
34424 ix86_builtins_isa[(int)d->code].nothrow_p = true;
34429 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34430 to return a pointer to VERSION_DECL if the outcome of the expression
34431 formed by PREDICATE_CHAIN is true. This function will be called during
34432 version dispatch to decide which function version to execute. It returns
34433 the basic block at the end, to which more conditions can be added. */
34435 static basic_block
34436 add_condition_to_bb (tree function_decl, tree version_decl,
34437 tree predicate_chain, basic_block new_bb)
34439 gimple return_stmt;
34440 tree convert_expr, result_var;
34441 gimple convert_stmt;
34442 gimple call_cond_stmt;
34443 gimple if_else_stmt;
34445 basic_block bb1, bb2, bb3;
34446 edge e12, e23;
34448 tree cond_var, and_expr_var = NULL_TREE;
34449 gimple_seq gseq;
34451 tree predicate_decl, predicate_arg;
34453 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34455 gcc_assert (new_bb != NULL);
34456 gseq = bb_seq (new_bb);
34459 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34460 build_fold_addr_expr (version_decl));
34461 result_var = create_tmp_var (ptr_type_node);
34462 convert_stmt = gimple_build_assign (result_var, convert_expr);
34463 return_stmt = gimple_build_return (result_var);
34465 if (predicate_chain == NULL_TREE)
34467 gimple_seq_add_stmt (&gseq, convert_stmt);
34468 gimple_seq_add_stmt (&gseq, return_stmt);
34469 set_bb_seq (new_bb, gseq);
34470 gimple_set_bb (convert_stmt, new_bb);
34471 gimple_set_bb (return_stmt, new_bb);
34472 pop_cfun ();
34473 return new_bb;
34476 while (predicate_chain != NULL)
34478 cond_var = create_tmp_var (integer_type_node);
34479 predicate_decl = TREE_PURPOSE (predicate_chain);
34480 predicate_arg = TREE_VALUE (predicate_chain);
34481 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34482 gimple_call_set_lhs (call_cond_stmt, cond_var);
34484 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34485 gimple_set_bb (call_cond_stmt, new_bb);
34486 gimple_seq_add_stmt (&gseq, call_cond_stmt);
34488 predicate_chain = TREE_CHAIN (predicate_chain);
34490 if (and_expr_var == NULL)
34491 and_expr_var = cond_var;
34492 else
34494 gimple assign_stmt;
34495 /* Use MIN_EXPR to check if any integer is zero?.
34496 and_expr_var = min_expr <cond_var, and_expr_var> */
34497 assign_stmt = gimple_build_assign (and_expr_var,
34498 build2 (MIN_EXPR, integer_type_node,
34499 cond_var, and_expr_var));
34501 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34502 gimple_set_bb (assign_stmt, new_bb);
34503 gimple_seq_add_stmt (&gseq, assign_stmt);
34507 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34508 integer_zero_node,
34509 NULL_TREE, NULL_TREE);
34510 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34511 gimple_set_bb (if_else_stmt, new_bb);
34512 gimple_seq_add_stmt (&gseq, if_else_stmt);
34514 gimple_seq_add_stmt (&gseq, convert_stmt);
34515 gimple_seq_add_stmt (&gseq, return_stmt);
34516 set_bb_seq (new_bb, gseq);
34518 bb1 = new_bb;
34519 e12 = split_block (bb1, if_else_stmt);
34520 bb2 = e12->dest;
34521 e12->flags &= ~EDGE_FALLTHRU;
34522 e12->flags |= EDGE_TRUE_VALUE;
34524 e23 = split_block (bb2, return_stmt);
34526 gimple_set_bb (convert_stmt, bb2);
34527 gimple_set_bb (return_stmt, bb2);
34529 bb3 = e23->dest;
34530 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34532 remove_edge (e23);
34533 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34535 pop_cfun ();
34537 return bb3;
34540 /* This parses the attribute arguments to target in DECL and determines
34541 the right builtin to use to match the platform specification.
34542 It returns the priority value for this version decl. If PREDICATE_LIST
34543 is not NULL, it stores the list of cpu features that need to be checked
34544 before dispatching this function. */
34546 static unsigned int
34547 get_builtin_code_for_version (tree decl, tree *predicate_list)
34549 tree attrs;
34550 struct cl_target_option cur_target;
34551 tree target_node;
34552 struct cl_target_option *new_target;
34553 const char *arg_str = NULL;
34554 const char *attrs_str = NULL;
34555 char *tok_str = NULL;
34556 char *token;
34558 /* Priority of i386 features, greater value is higher priority. This is
34559 used to decide the order in which function dispatch must happen. For
34560 instance, a version specialized for SSE4.2 should be checked for dispatch
34561 before a version for SSE3, as SSE4.2 implies SSE3. */
34562 enum feature_priority
34564 P_ZERO = 0,
34565 P_MMX,
34566 P_SSE,
34567 P_SSE2,
34568 P_SSE3,
34569 P_SSSE3,
34570 P_PROC_SSSE3,
34571 P_SSE4_A,
34572 P_PROC_SSE4_A,
34573 P_SSE4_1,
34574 P_SSE4_2,
34575 P_PROC_SSE4_2,
34576 P_POPCNT,
34577 P_AES,
34578 P_AVX,
34579 P_PROC_AVX,
34580 P_BMI,
34581 P_PROC_BMI,
34582 P_FMA4,
34583 P_XOP,
34584 P_PROC_XOP,
34585 P_FMA,
34586 P_PROC_FMA,
34587 P_BMI2,
34588 P_AVX2,
34589 P_PROC_AVX2,
34590 P_AVX512F,
34591 P_PROC_AVX512F
34594 enum feature_priority priority = P_ZERO;
34596 /* These are the target attribute strings for which a dispatcher is
34597 available, from fold_builtin_cpu. */
34599 static struct _feature_list
34601 const char *const name;
34602 const enum feature_priority priority;
34604 const feature_list[] =
34606 {"mmx", P_MMX},
34607 {"sse", P_SSE},
34608 {"sse2", P_SSE2},
34609 {"sse3", P_SSE3},
34610 {"sse4a", P_SSE4_A},
34611 {"ssse3", P_SSSE3},
34612 {"sse4.1", P_SSE4_1},
34613 {"sse4.2", P_SSE4_2},
34614 {"popcnt", P_POPCNT},
34615 {"aes", P_AES},
34616 {"avx", P_AVX},
34617 {"bmi", P_BMI},
34618 {"fma4", P_FMA4},
34619 {"xop", P_XOP},
34620 {"fma", P_FMA},
34621 {"bmi2", P_BMI2},
34622 {"avx2", P_AVX2},
34623 {"avx512f", P_AVX512F}
34627 static unsigned int NUM_FEATURES
34628 = sizeof (feature_list) / sizeof (struct _feature_list);
34630 unsigned int i;
34632 tree predicate_chain = NULL_TREE;
34633 tree predicate_decl, predicate_arg;
34635 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34636 gcc_assert (attrs != NULL);
34638 attrs = TREE_VALUE (TREE_VALUE (attrs));
34640 gcc_assert (TREE_CODE (attrs) == STRING_CST);
34641 attrs_str = TREE_STRING_POINTER (attrs);
34643 /* Return priority zero for default function. */
34644 if (strcmp (attrs_str, "default") == 0)
34645 return 0;
34647 /* Handle arch= if specified. For priority, set it to be 1 more than
34648 the best instruction set the processor can handle. For instance, if
34649 there is a version for atom and a version for ssse3 (the highest ISA
34650 priority for atom), the atom version must be checked for dispatch
34651 before the ssse3 version. */
34652 if (strstr (attrs_str, "arch=") != NULL)
34654 cl_target_option_save (&cur_target, &global_options);
34655 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34656 &global_options_set);
34658 gcc_assert (target_node);
34659 new_target = TREE_TARGET_OPTION (target_node);
34660 gcc_assert (new_target);
34662 if (new_target->arch_specified && new_target->arch > 0)
34664 switch (new_target->arch)
34666 case PROCESSOR_CORE2:
34667 arg_str = "core2";
34668 priority = P_PROC_SSSE3;
34669 break;
34670 case PROCESSOR_NEHALEM:
34671 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34672 arg_str = "westmere";
34673 else
34674 /* We translate "arch=corei7" and "arch=nehalem" to
34675 "corei7" so that it will be mapped to M_INTEL_COREI7
34676 as cpu type to cover all M_INTEL_COREI7_XXXs. */
34677 arg_str = "corei7";
34678 priority = P_PROC_SSE4_2;
34679 break;
34680 case PROCESSOR_SANDYBRIDGE:
34681 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34682 arg_str = "ivybridge";
34683 else
34684 arg_str = "sandybridge";
34685 priority = P_PROC_AVX;
34686 break;
34687 case PROCESSOR_HASWELL:
34688 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34689 arg_str = "broadwell";
34690 else
34691 arg_str = "haswell";
34692 priority = P_PROC_AVX2;
34693 break;
34694 case PROCESSOR_BONNELL:
34695 arg_str = "bonnell";
34696 priority = P_PROC_SSSE3;
34697 break;
34698 case PROCESSOR_KNL:
34699 arg_str = "knl";
34700 priority = P_PROC_AVX512F;
34701 break;
34702 case PROCESSOR_SILVERMONT:
34703 arg_str = "silvermont";
34704 priority = P_PROC_SSE4_2;
34705 break;
34706 case PROCESSOR_AMDFAM10:
34707 arg_str = "amdfam10h";
34708 priority = P_PROC_SSE4_A;
34709 break;
34710 case PROCESSOR_BTVER1:
34711 arg_str = "btver1";
34712 priority = P_PROC_SSE4_A;
34713 break;
34714 case PROCESSOR_BTVER2:
34715 arg_str = "btver2";
34716 priority = P_PROC_BMI;
34717 break;
34718 case PROCESSOR_BDVER1:
34719 arg_str = "bdver1";
34720 priority = P_PROC_XOP;
34721 break;
34722 case PROCESSOR_BDVER2:
34723 arg_str = "bdver2";
34724 priority = P_PROC_FMA;
34725 break;
34726 case PROCESSOR_BDVER3:
34727 arg_str = "bdver3";
34728 priority = P_PROC_FMA;
34729 break;
34730 case PROCESSOR_BDVER4:
34731 arg_str = "bdver4";
34732 priority = P_PROC_AVX2;
34733 break;
34737 cl_target_option_restore (&global_options, &cur_target);
34739 if (predicate_list && arg_str == NULL)
34741 error_at (DECL_SOURCE_LOCATION (decl),
34742 "No dispatcher found for the versioning attributes");
34743 return 0;
34746 if (predicate_list)
34748 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34749 /* For a C string literal the length includes the trailing NULL. */
34750 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34751 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34752 predicate_chain);
34756 /* Process feature name. */
34757 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
34758 strcpy (tok_str, attrs_str);
34759 token = strtok (tok_str, ",");
34760 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34762 while (token != NULL)
34764 /* Do not process "arch=" */
34765 if (strncmp (token, "arch=", 5) == 0)
34767 token = strtok (NULL, ",");
34768 continue;
34770 for (i = 0; i < NUM_FEATURES; ++i)
34772 if (strcmp (token, feature_list[i].name) == 0)
34774 if (predicate_list)
34776 predicate_arg = build_string_literal (
34777 strlen (feature_list[i].name) + 1,
34778 feature_list[i].name);
34779 predicate_chain = tree_cons (predicate_decl, predicate_arg,
34780 predicate_chain);
34782 /* Find the maximum priority feature. */
34783 if (feature_list[i].priority > priority)
34784 priority = feature_list[i].priority;
34786 break;
34789 if (predicate_list && i == NUM_FEATURES)
34791 error_at (DECL_SOURCE_LOCATION (decl),
34792 "No dispatcher found for %s", token);
34793 return 0;
34795 token = strtok (NULL, ",");
34797 free (tok_str);
34799 if (predicate_list && predicate_chain == NULL_TREE)
34801 error_at (DECL_SOURCE_LOCATION (decl),
34802 "No dispatcher found for the versioning attributes : %s",
34803 attrs_str);
34804 return 0;
34806 else if (predicate_list)
34808 predicate_chain = nreverse (predicate_chain);
34809 *predicate_list = predicate_chain;
34812 return priority;
34815 /* This compares the priority of target features in function DECL1
34816 and DECL2. It returns positive value if DECL1 is higher priority,
34817 negative value if DECL2 is higher priority and 0 if they are the
34818 same. */
34820 static int
34821 ix86_compare_version_priority (tree decl1, tree decl2)
34823 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34824 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34826 return (int)priority1 - (int)priority2;
34829 /* V1 and V2 point to function versions with different priorities
34830 based on the target ISA. This function compares their priorities. */
34832 static int
34833 feature_compare (const void *v1, const void *v2)
34835 typedef struct _function_version_info
34837 tree version_decl;
34838 tree predicate_chain;
34839 unsigned int dispatch_priority;
34840 } function_version_info;
34842 const function_version_info c1 = *(const function_version_info *)v1;
34843 const function_version_info c2 = *(const function_version_info *)v2;
34844 return (c2.dispatch_priority - c1.dispatch_priority);
34847 /* This function generates the dispatch function for
34848 multi-versioned functions. DISPATCH_DECL is the function which will
34849 contain the dispatch logic. FNDECLS are the function choices for
34850 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
34851 in DISPATCH_DECL in which the dispatch code is generated. */
34853 static int
34854 dispatch_function_versions (tree dispatch_decl,
34855 void *fndecls_p,
34856 basic_block *empty_bb)
34858 tree default_decl;
34859 gimple ifunc_cpu_init_stmt;
34860 gimple_seq gseq;
34861 int ix;
34862 tree ele;
34863 vec<tree> *fndecls;
34864 unsigned int num_versions = 0;
34865 unsigned int actual_versions = 0;
34866 unsigned int i;
34868 struct _function_version_info
34870 tree version_decl;
34871 tree predicate_chain;
34872 unsigned int dispatch_priority;
34873 }*function_version_info;
34875 gcc_assert (dispatch_decl != NULL
34876 && fndecls_p != NULL
34877 && empty_bb != NULL);
34879 /*fndecls_p is actually a vector. */
34880 fndecls = static_cast<vec<tree> *> (fndecls_p);
34882 /* At least one more version other than the default. */
34883 num_versions = fndecls->length ();
34884 gcc_assert (num_versions >= 2);
34886 function_version_info = (struct _function_version_info *)
34887 XNEWVEC (struct _function_version_info, (num_versions - 1));
34889 /* The first version in the vector is the default decl. */
34890 default_decl = (*fndecls)[0];
34892 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34894 gseq = bb_seq (*empty_bb);
34895 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
34896 constructors, so explicity call __builtin_cpu_init here. */
34897 ifunc_cpu_init_stmt = gimple_build_call_vec (
34898 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34899 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34900 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34901 set_bb_seq (*empty_bb, gseq);
34903 pop_cfun ();
34906 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34908 tree version_decl = ele;
34909 tree predicate_chain = NULL_TREE;
34910 unsigned int priority;
34911 /* Get attribute string, parse it and find the right predicate decl.
34912 The predicate function could be a lengthy combination of many
34913 features, like arch-type and various isa-variants. */
34914 priority = get_builtin_code_for_version (version_decl,
34915 &predicate_chain);
34917 if (predicate_chain == NULL_TREE)
34918 continue;
34920 function_version_info [actual_versions].version_decl = version_decl;
34921 function_version_info [actual_versions].predicate_chain
34922 = predicate_chain;
34923 function_version_info [actual_versions].dispatch_priority = priority;
34924 actual_versions++;
34927 /* Sort the versions according to descending order of dispatch priority. The
34928 priority is based on the ISA. This is not a perfect solution. There
34929 could still be ambiguity. If more than one function version is suitable
34930 to execute, which one should be dispatched? In future, allow the user
34931 to specify a dispatch priority next to the version. */
34932 qsort (function_version_info, actual_versions,
34933 sizeof (struct _function_version_info), feature_compare);
34935 for (i = 0; i < actual_versions; ++i)
34936 *empty_bb = add_condition_to_bb (dispatch_decl,
34937 function_version_info[i].version_decl,
34938 function_version_info[i].predicate_chain,
34939 *empty_bb);
34941 /* dispatch default version at the end. */
34942 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34943 NULL, *empty_bb);
34945 free (function_version_info);
34946 return 0;
34949 /* Comparator function to be used in qsort routine to sort attribute
34950 specification strings to "target". */
34952 static int
34953 attr_strcmp (const void *v1, const void *v2)
34955 const char *c1 = *(char *const*)v1;
34956 const char *c2 = *(char *const*)v2;
34957 return strcmp (c1, c2);
34960 /* ARGLIST is the argument to target attribute. This function tokenizes
34961 the comma separated arguments, sorts them and returns a string which
34962 is a unique identifier for the comma separated arguments. It also
34963 replaces non-identifier characters "=,-" with "_". */
34965 static char *
34966 sorted_attr_string (tree arglist)
34968 tree arg;
34969 size_t str_len_sum = 0;
34970 char **args = NULL;
34971 char *attr_str, *ret_str;
34972 char *attr = NULL;
34973 unsigned int argnum = 1;
34974 unsigned int i;
34976 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34978 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34979 size_t len = strlen (str);
34980 str_len_sum += len + 1;
34981 if (arg != arglist)
34982 argnum++;
34983 for (i = 0; i < strlen (str); i++)
34984 if (str[i] == ',')
34985 argnum++;
34988 attr_str = XNEWVEC (char, str_len_sum);
34989 str_len_sum = 0;
34990 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34992 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34993 size_t len = strlen (str);
34994 memcpy (attr_str + str_len_sum, str, len);
34995 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34996 str_len_sum += len + 1;
34999 /* Replace "=,-" with "_". */
35000 for (i = 0; i < strlen (attr_str); i++)
35001 if (attr_str[i] == '=' || attr_str[i]== '-')
35002 attr_str[i] = '_';
35004 if (argnum == 1)
35005 return attr_str;
35007 args = XNEWVEC (char *, argnum);
35009 i = 0;
35010 attr = strtok (attr_str, ",");
35011 while (attr != NULL)
35013 args[i] = attr;
35014 i++;
35015 attr = strtok (NULL, ",");
35018 qsort (args, argnum, sizeof (char *), attr_strcmp);
35020 ret_str = XNEWVEC (char, str_len_sum);
35021 str_len_sum = 0;
35022 for (i = 0; i < argnum; i++)
35024 size_t len = strlen (args[i]);
35025 memcpy (ret_str + str_len_sum, args[i], len);
35026 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
35027 str_len_sum += len + 1;
35030 XDELETEVEC (args);
35031 XDELETEVEC (attr_str);
35032 return ret_str;
35035 /* This function changes the assembler name for functions that are
35036 versions. If DECL is a function version and has a "target"
35037 attribute, it appends the attribute string to its assembler name. */
35039 static tree
35040 ix86_mangle_function_version_assembler_name (tree decl, tree id)
35042 tree version_attr;
35043 const char *orig_name, *version_string;
35044 char *attr_str, *assembler_name;
35046 if (DECL_DECLARED_INLINE_P (decl)
35047 && lookup_attribute ("gnu_inline",
35048 DECL_ATTRIBUTES (decl)))
35049 error_at (DECL_SOURCE_LOCATION (decl),
35050 "Function versions cannot be marked as gnu_inline,"
35051 " bodies have to be generated");
35053 if (DECL_VIRTUAL_P (decl)
35054 || DECL_VINDEX (decl))
35055 sorry ("Virtual function multiversioning not supported");
35057 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35059 /* target attribute string cannot be NULL. */
35060 gcc_assert (version_attr != NULL_TREE);
35062 orig_name = IDENTIFIER_POINTER (id);
35063 version_string
35064 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35066 if (strcmp (version_string, "default") == 0)
35067 return id;
35069 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35070 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35072 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35074 /* Allow assembler name to be modified if already set. */
35075 if (DECL_ASSEMBLER_NAME_SET_P (decl))
35076 SET_DECL_RTL (decl, NULL);
35078 tree ret = get_identifier (assembler_name);
35079 XDELETEVEC (attr_str);
35080 XDELETEVEC (assembler_name);
35081 return ret;
35084 /* This function returns true if FN1 and FN2 are versions of the same function,
35085 that is, the target strings of the function decls are different. This assumes
35086 that FN1 and FN2 have the same signature. */
35088 static bool
35089 ix86_function_versions (tree fn1, tree fn2)
35091 tree attr1, attr2;
35092 char *target1, *target2;
35093 bool result;
35095 if (TREE_CODE (fn1) != FUNCTION_DECL
35096 || TREE_CODE (fn2) != FUNCTION_DECL)
35097 return false;
35099 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35100 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35102 /* At least one function decl should have the target attribute specified. */
35103 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35104 return false;
35106 /* Diagnose missing target attribute if one of the decls is already
35107 multi-versioned. */
35108 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35110 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35112 if (attr2 != NULL_TREE)
35114 std::swap (fn1, fn2);
35115 attr1 = attr2;
35117 error_at (DECL_SOURCE_LOCATION (fn2),
35118 "missing %<target%> attribute for multi-versioned %D",
35119 fn2);
35120 inform (DECL_SOURCE_LOCATION (fn1),
35121 "previous declaration of %D", fn1);
35122 /* Prevent diagnosing of the same error multiple times. */
35123 DECL_ATTRIBUTES (fn2)
35124 = tree_cons (get_identifier ("target"),
35125 copy_node (TREE_VALUE (attr1)),
35126 DECL_ATTRIBUTES (fn2));
35128 return false;
35131 target1 = sorted_attr_string (TREE_VALUE (attr1));
35132 target2 = sorted_attr_string (TREE_VALUE (attr2));
35134 /* The sorted target strings must be different for fn1 and fn2
35135 to be versions. */
35136 if (strcmp (target1, target2) == 0)
35137 result = false;
35138 else
35139 result = true;
35141 XDELETEVEC (target1);
35142 XDELETEVEC (target2);
35144 return result;
35147 static tree
35148 ix86_mangle_decl_assembler_name (tree decl, tree id)
35150 /* For function version, add the target suffix to the assembler name. */
35151 if (TREE_CODE (decl) == FUNCTION_DECL
35152 && DECL_FUNCTION_VERSIONED (decl))
35153 id = ix86_mangle_function_version_assembler_name (decl, id);
35154 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35155 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35156 #endif
35158 return id;
35161 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
35162 is true, append the full path name of the source file. */
35164 static char *
35165 make_name (tree decl, const char *suffix, bool make_unique)
35167 char *global_var_name;
35168 int name_len;
35169 const char *name;
35170 const char *unique_name = NULL;
35172 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35174 /* Get a unique name that can be used globally without any chances
35175 of collision at link time. */
35176 if (make_unique)
35177 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35179 name_len = strlen (name) + strlen (suffix) + 2;
35181 if (make_unique)
35182 name_len += strlen (unique_name) + 1;
35183 global_var_name = XNEWVEC (char, name_len);
35185 /* Use '.' to concatenate names as it is demangler friendly. */
35186 if (make_unique)
35187 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35188 suffix);
35189 else
35190 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35192 return global_var_name;
35195 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35197 /* Make a dispatcher declaration for the multi-versioned function DECL.
35198 Calls to DECL function will be replaced with calls to the dispatcher
35199 by the front-end. Return the decl created. */
35201 static tree
35202 make_dispatcher_decl (const tree decl)
35204 tree func_decl;
35205 char *func_name;
35206 tree fn_type, func_type;
35207 bool is_uniq = false;
35209 if (TREE_PUBLIC (decl) == 0)
35210 is_uniq = true;
35212 func_name = make_name (decl, "ifunc", is_uniq);
35214 fn_type = TREE_TYPE (decl);
35215 func_type = build_function_type (TREE_TYPE (fn_type),
35216 TYPE_ARG_TYPES (fn_type));
35218 func_decl = build_fn_decl (func_name, func_type);
35219 XDELETEVEC (func_name);
35220 TREE_USED (func_decl) = 1;
35221 DECL_CONTEXT (func_decl) = NULL_TREE;
35222 DECL_INITIAL (func_decl) = error_mark_node;
35223 DECL_ARTIFICIAL (func_decl) = 1;
35224 /* Mark this func as external, the resolver will flip it again if
35225 it gets generated. */
35226 DECL_EXTERNAL (func_decl) = 1;
35227 /* This will be of type IFUNCs have to be externally visible. */
35228 TREE_PUBLIC (func_decl) = 1;
35230 return func_decl;
35233 #endif
35235 /* Returns true if decl is multi-versioned and DECL is the default function,
35236 that is it is not tagged with target specific optimization. */
35238 static bool
35239 is_function_default_version (const tree decl)
35241 if (TREE_CODE (decl) != FUNCTION_DECL
35242 || !DECL_FUNCTION_VERSIONED (decl))
35243 return false;
35244 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35245 gcc_assert (attr);
35246 attr = TREE_VALUE (TREE_VALUE (attr));
35247 return (TREE_CODE (attr) == STRING_CST
35248 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35251 /* Make a dispatcher declaration for the multi-versioned function DECL.
35252 Calls to DECL function will be replaced with calls to the dispatcher
35253 by the front-end. Returns the decl of the dispatcher function. */
35255 static tree
35256 ix86_get_function_versions_dispatcher (void *decl)
35258 tree fn = (tree) decl;
35259 struct cgraph_node *node = NULL;
35260 struct cgraph_node *default_node = NULL;
35261 struct cgraph_function_version_info *node_v = NULL;
35262 struct cgraph_function_version_info *first_v = NULL;
35264 tree dispatch_decl = NULL;
35266 struct cgraph_function_version_info *default_version_info = NULL;
35268 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35270 node = cgraph_node::get (fn);
35271 gcc_assert (node != NULL);
35273 node_v = node->function_version ();
35274 gcc_assert (node_v != NULL);
35276 if (node_v->dispatcher_resolver != NULL)
35277 return node_v->dispatcher_resolver;
35279 /* Find the default version and make it the first node. */
35280 first_v = node_v;
35281 /* Go to the beginning of the chain. */
35282 while (first_v->prev != NULL)
35283 first_v = first_v->prev;
35284 default_version_info = first_v;
35285 while (default_version_info != NULL)
35287 if (is_function_default_version
35288 (default_version_info->this_node->decl))
35289 break;
35290 default_version_info = default_version_info->next;
35293 /* If there is no default node, just return NULL. */
35294 if (default_version_info == NULL)
35295 return NULL;
35297 /* Make default info the first node. */
35298 if (first_v != default_version_info)
35300 default_version_info->prev->next = default_version_info->next;
35301 if (default_version_info->next)
35302 default_version_info->next->prev = default_version_info->prev;
35303 first_v->prev = default_version_info;
35304 default_version_info->next = first_v;
35305 default_version_info->prev = NULL;
35308 default_node = default_version_info->this_node;
35310 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35311 if (targetm.has_ifunc_p ())
35313 struct cgraph_function_version_info *it_v = NULL;
35314 struct cgraph_node *dispatcher_node = NULL;
35315 struct cgraph_function_version_info *dispatcher_version_info = NULL;
35317 /* Right now, the dispatching is done via ifunc. */
35318 dispatch_decl = make_dispatcher_decl (default_node->decl);
35320 dispatcher_node = cgraph_node::get_create (dispatch_decl);
35321 gcc_assert (dispatcher_node != NULL);
35322 dispatcher_node->dispatcher_function = 1;
35323 dispatcher_version_info
35324 = dispatcher_node->insert_new_function_version ();
35325 dispatcher_version_info->next = default_version_info;
35326 dispatcher_node->definition = 1;
35328 /* Set the dispatcher for all the versions. */
35329 it_v = default_version_info;
35330 while (it_v != NULL)
35332 it_v->dispatcher_resolver = dispatch_decl;
35333 it_v = it_v->next;
35336 else
35337 #endif
35339 error_at (DECL_SOURCE_LOCATION (default_node->decl),
35340 "multiversioning needs ifunc which is not supported "
35341 "on this target");
35344 return dispatch_decl;
35347 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
35348 it to CHAIN. */
35350 static tree
35351 make_attribute (const char *name, const char *arg_name, tree chain)
35353 tree attr_name;
35354 tree attr_arg_name;
35355 tree attr_args;
35356 tree attr;
35358 attr_name = get_identifier (name);
35359 attr_arg_name = build_string (strlen (arg_name), arg_name);
35360 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35361 attr = tree_cons (attr_name, attr_args, chain);
35362 return attr;
35365 /* Make the resolver function decl to dispatch the versions of
35366 a multi-versioned function, DEFAULT_DECL. Create an
35367 empty basic block in the resolver and store the pointer in
35368 EMPTY_BB. Return the decl of the resolver function. */
35370 static tree
35371 make_resolver_func (const tree default_decl,
35372 const tree dispatch_decl,
35373 basic_block *empty_bb)
35375 char *resolver_name;
35376 tree decl, type, decl_name, t;
35377 bool is_uniq = false;
35379 /* IFUNC's have to be globally visible. So, if the default_decl is
35380 not, then the name of the IFUNC should be made unique. */
35381 if (TREE_PUBLIC (default_decl) == 0)
35382 is_uniq = true;
35384 /* Append the filename to the resolver function if the versions are
35385 not externally visible. This is because the resolver function has
35386 to be externally visible for the loader to find it. So, appending
35387 the filename will prevent conflicts with a resolver function from
35388 another module which is based on the same version name. */
35389 resolver_name = make_name (default_decl, "resolver", is_uniq);
35391 /* The resolver function should return a (void *). */
35392 type = build_function_type_list (ptr_type_node, NULL_TREE);
35394 decl = build_fn_decl (resolver_name, type);
35395 decl_name = get_identifier (resolver_name);
35396 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35398 DECL_NAME (decl) = decl_name;
35399 TREE_USED (decl) = 1;
35400 DECL_ARTIFICIAL (decl) = 1;
35401 DECL_IGNORED_P (decl) = 0;
35402 /* IFUNC resolvers have to be externally visible. */
35403 TREE_PUBLIC (decl) = 1;
35404 DECL_UNINLINABLE (decl) = 1;
35406 /* Resolver is not external, body is generated. */
35407 DECL_EXTERNAL (decl) = 0;
35408 DECL_EXTERNAL (dispatch_decl) = 0;
35410 DECL_CONTEXT (decl) = NULL_TREE;
35411 DECL_INITIAL (decl) = make_node (BLOCK);
35412 DECL_STATIC_CONSTRUCTOR (decl) = 0;
35414 if (DECL_COMDAT_GROUP (default_decl)
35415 || TREE_PUBLIC (default_decl))
35417 /* In this case, each translation unit with a call to this
35418 versioned function will put out a resolver. Ensure it
35419 is comdat to keep just one copy. */
35420 DECL_COMDAT (decl) = 1;
35421 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35423 /* Build result decl and add to function_decl. */
35424 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35425 DECL_ARTIFICIAL (t) = 1;
35426 DECL_IGNORED_P (t) = 1;
35427 DECL_RESULT (decl) = t;
35429 gimplify_function_tree (decl);
35430 push_cfun (DECL_STRUCT_FUNCTION (decl));
35431 *empty_bb = init_lowered_empty_function (decl, false, 0);
35433 cgraph_node::add_new_function (decl, true);
35434 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35436 pop_cfun ();
35438 gcc_assert (dispatch_decl != NULL);
35439 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
35440 DECL_ATTRIBUTES (dispatch_decl)
35441 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35443 /* Create the alias for dispatch to resolver here. */
35444 /*cgraph_create_function_alias (dispatch_decl, decl);*/
35445 cgraph_node::create_same_body_alias (dispatch_decl, decl);
35446 XDELETEVEC (resolver_name);
35447 return decl;
35450 /* Generate the dispatching code body to dispatch multi-versioned function
35451 DECL. The target hook is called to process the "target" attributes and
35452 provide the code to dispatch the right function at run-time. NODE points
35453 to the dispatcher decl whose body will be created. */
35455 static tree
35456 ix86_generate_version_dispatcher_body (void *node_p)
35458 tree resolver_decl;
35459 basic_block empty_bb;
35460 tree default_ver_decl;
35461 struct cgraph_node *versn;
35462 struct cgraph_node *node;
35464 struct cgraph_function_version_info *node_version_info = NULL;
35465 struct cgraph_function_version_info *versn_info = NULL;
35467 node = (cgraph_node *)node_p;
35469 node_version_info = node->function_version ();
35470 gcc_assert (node->dispatcher_function
35471 && node_version_info != NULL);
35473 if (node_version_info->dispatcher_resolver)
35474 return node_version_info->dispatcher_resolver;
35476 /* The first version in the chain corresponds to the default version. */
35477 default_ver_decl = node_version_info->next->this_node->decl;
35479 /* node is going to be an alias, so remove the finalized bit. */
35480 node->definition = false;
35482 resolver_decl = make_resolver_func (default_ver_decl,
35483 node->decl, &empty_bb);
35485 node_version_info->dispatcher_resolver = resolver_decl;
35487 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35489 auto_vec<tree, 2> fn_ver_vec;
35491 for (versn_info = node_version_info->next; versn_info;
35492 versn_info = versn_info->next)
35494 versn = versn_info->this_node;
35495 /* Check for virtual functions here again, as by this time it should
35496 have been determined if this function needs a vtable index or
35497 not. This happens for methods in derived classes that override
35498 virtual methods in base classes but are not explicitly marked as
35499 virtual. */
35500 if (DECL_VINDEX (versn->decl))
35501 sorry ("Virtual function multiversioning not supported");
35503 fn_ver_vec.safe_push (versn->decl);
35506 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35507 cgraph_edge::rebuild_edges ();
35508 pop_cfun ();
35509 return resolver_decl;
35511 /* This builds the processor_model struct type defined in
35512 libgcc/config/i386/cpuinfo.c */
35514 static tree
35515 build_processor_model_struct (void)
35517 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35518 "__cpu_features"};
35519 tree field = NULL_TREE, field_chain = NULL_TREE;
35520 int i;
35521 tree type = make_node (RECORD_TYPE);
35523 /* The first 3 fields are unsigned int. */
35524 for (i = 0; i < 3; ++i)
35526 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35527 get_identifier (field_name[i]), unsigned_type_node);
35528 if (field_chain != NULL_TREE)
35529 DECL_CHAIN (field) = field_chain;
35530 field_chain = field;
35533 /* The last field is an array of unsigned integers of size one. */
35534 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35535 get_identifier (field_name[3]),
35536 build_array_type (unsigned_type_node,
35537 build_index_type (size_one_node)));
35538 if (field_chain != NULL_TREE)
35539 DECL_CHAIN (field) = field_chain;
35540 field_chain = field;
35542 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35543 return type;
35546 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35548 static tree
35549 make_var_decl (tree type, const char *name)
35551 tree new_decl;
35553 new_decl = build_decl (UNKNOWN_LOCATION,
35554 VAR_DECL,
35555 get_identifier(name),
35556 type);
35558 DECL_EXTERNAL (new_decl) = 1;
35559 TREE_STATIC (new_decl) = 1;
35560 TREE_PUBLIC (new_decl) = 1;
35561 DECL_INITIAL (new_decl) = 0;
35562 DECL_ARTIFICIAL (new_decl) = 0;
35563 DECL_PRESERVE_P (new_decl) = 1;
35565 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35566 assemble_variable (new_decl, 0, 0, 0);
35568 return new_decl;
35571 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35572 into an integer defined in libgcc/config/i386/cpuinfo.c */
35574 static tree
35575 fold_builtin_cpu (tree fndecl, tree *args)
35577 unsigned int i;
35578 enum ix86_builtins fn_code = (enum ix86_builtins)
35579 DECL_FUNCTION_CODE (fndecl);
35580 tree param_string_cst = NULL;
35582 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35583 enum processor_features
35585 F_CMOV = 0,
35586 F_MMX,
35587 F_POPCNT,
35588 F_SSE,
35589 F_SSE2,
35590 F_SSE3,
35591 F_SSSE3,
35592 F_SSE4_1,
35593 F_SSE4_2,
35594 F_AVX,
35595 F_AVX2,
35596 F_SSE4_A,
35597 F_FMA4,
35598 F_XOP,
35599 F_FMA,
35600 F_AVX512F,
35601 F_BMI,
35602 F_BMI2,
35603 F_AES,
35604 F_MAX
35607 /* These are the values for vendor types and cpu types and subtypes
35608 in cpuinfo.c. Cpu types and subtypes should be subtracted by
35609 the corresponding start value. */
35610 enum processor_model
35612 M_INTEL = 1,
35613 M_AMD,
35614 M_CPU_TYPE_START,
35615 M_INTEL_BONNELL,
35616 M_INTEL_CORE2,
35617 M_INTEL_COREI7,
35618 M_AMDFAM10H,
35619 M_AMDFAM15H,
35620 M_INTEL_SILVERMONT,
35621 M_INTEL_KNL,
35622 M_AMD_BTVER1,
35623 M_AMD_BTVER2,
35624 M_CPU_SUBTYPE_START,
35625 M_INTEL_COREI7_NEHALEM,
35626 M_INTEL_COREI7_WESTMERE,
35627 M_INTEL_COREI7_SANDYBRIDGE,
35628 M_AMDFAM10H_BARCELONA,
35629 M_AMDFAM10H_SHANGHAI,
35630 M_AMDFAM10H_ISTANBUL,
35631 M_AMDFAM15H_BDVER1,
35632 M_AMDFAM15H_BDVER2,
35633 M_AMDFAM15H_BDVER3,
35634 M_AMDFAM15H_BDVER4,
35635 M_INTEL_COREI7_IVYBRIDGE,
35636 M_INTEL_COREI7_HASWELL,
35637 M_INTEL_COREI7_BROADWELL
35640 static struct _arch_names_table
35642 const char *const name;
35643 const enum processor_model model;
35645 const arch_names_table[] =
35647 {"amd", M_AMD},
35648 {"intel", M_INTEL},
35649 {"atom", M_INTEL_BONNELL},
35650 {"slm", M_INTEL_SILVERMONT},
35651 {"core2", M_INTEL_CORE2},
35652 {"corei7", M_INTEL_COREI7},
35653 {"nehalem", M_INTEL_COREI7_NEHALEM},
35654 {"westmere", M_INTEL_COREI7_WESTMERE},
35655 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35656 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35657 {"haswell", M_INTEL_COREI7_HASWELL},
35658 {"broadwell", M_INTEL_COREI7_BROADWELL},
35659 {"bonnell", M_INTEL_BONNELL},
35660 {"silvermont", M_INTEL_SILVERMONT},
35661 {"knl", M_INTEL_KNL},
35662 {"amdfam10h", M_AMDFAM10H},
35663 {"barcelona", M_AMDFAM10H_BARCELONA},
35664 {"shanghai", M_AMDFAM10H_SHANGHAI},
35665 {"istanbul", M_AMDFAM10H_ISTANBUL},
35666 {"btver1", M_AMD_BTVER1},
35667 {"amdfam15h", M_AMDFAM15H},
35668 {"bdver1", M_AMDFAM15H_BDVER1},
35669 {"bdver2", M_AMDFAM15H_BDVER2},
35670 {"bdver3", M_AMDFAM15H_BDVER3},
35671 {"bdver4", M_AMDFAM15H_BDVER4},
35672 {"btver2", M_AMD_BTVER2},
35675 static struct _isa_names_table
35677 const char *const name;
35678 const enum processor_features feature;
35680 const isa_names_table[] =
35682 {"cmov", F_CMOV},
35683 {"mmx", F_MMX},
35684 {"popcnt", F_POPCNT},
35685 {"sse", F_SSE},
35686 {"sse2", F_SSE2},
35687 {"sse3", F_SSE3},
35688 {"ssse3", F_SSSE3},
35689 {"sse4a", F_SSE4_A},
35690 {"sse4.1", F_SSE4_1},
35691 {"sse4.2", F_SSE4_2},
35692 {"avx", F_AVX},
35693 {"fma4", F_FMA4},
35694 {"xop", F_XOP},
35695 {"fma", F_FMA},
35696 {"avx2", F_AVX2},
35697 {"avx512f",F_AVX512F},
35698 {"bmi", F_BMI},
35699 {"bmi2", F_BMI2},
35700 {"aes", F_AES}
35703 tree __processor_model_type = build_processor_model_struct ();
35704 tree __cpu_model_var = make_var_decl (__processor_model_type,
35705 "__cpu_model");
35708 varpool_node::add (__cpu_model_var);
35710 gcc_assert ((args != NULL) && (*args != NULL));
35712 param_string_cst = *args;
35713 while (param_string_cst
35714 && TREE_CODE (param_string_cst) != STRING_CST)
35716 /* *args must be a expr that can contain other EXPRS leading to a
35717 STRING_CST. */
35718 if (!EXPR_P (param_string_cst))
35720 error ("Parameter to builtin must be a string constant or literal");
35721 return integer_zero_node;
35723 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35726 gcc_assert (param_string_cst);
35728 if (fn_code == IX86_BUILTIN_CPU_IS)
35730 tree ref;
35731 tree field;
35732 tree final;
35734 unsigned int field_val = 0;
35735 unsigned int NUM_ARCH_NAMES
35736 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35738 for (i = 0; i < NUM_ARCH_NAMES; i++)
35739 if (strcmp (arch_names_table[i].name,
35740 TREE_STRING_POINTER (param_string_cst)) == 0)
35741 break;
35743 if (i == NUM_ARCH_NAMES)
35745 error ("Parameter to builtin not valid: %s",
35746 TREE_STRING_POINTER (param_string_cst));
35747 return integer_zero_node;
35750 field = TYPE_FIELDS (__processor_model_type);
35751 field_val = arch_names_table[i].model;
35753 /* CPU types are stored in the next field. */
35754 if (field_val > M_CPU_TYPE_START
35755 && field_val < M_CPU_SUBTYPE_START)
35757 field = DECL_CHAIN (field);
35758 field_val -= M_CPU_TYPE_START;
35761 /* CPU subtypes are stored in the next field. */
35762 if (field_val > M_CPU_SUBTYPE_START)
35764 field = DECL_CHAIN ( DECL_CHAIN (field));
35765 field_val -= M_CPU_SUBTYPE_START;
35768 /* Get the appropriate field in __cpu_model. */
35769 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35770 field, NULL_TREE);
35772 /* Check the value. */
35773 final = build2 (EQ_EXPR, unsigned_type_node, ref,
35774 build_int_cstu (unsigned_type_node, field_val));
35775 return build1 (CONVERT_EXPR, integer_type_node, final);
35777 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35779 tree ref;
35780 tree array_elt;
35781 tree field;
35782 tree final;
35784 unsigned int field_val = 0;
35785 unsigned int NUM_ISA_NAMES
35786 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35788 for (i = 0; i < NUM_ISA_NAMES; i++)
35789 if (strcmp (isa_names_table[i].name,
35790 TREE_STRING_POINTER (param_string_cst)) == 0)
35791 break;
35793 if (i == NUM_ISA_NAMES)
35795 error ("Parameter to builtin not valid: %s",
35796 TREE_STRING_POINTER (param_string_cst));
35797 return integer_zero_node;
35800 field = TYPE_FIELDS (__processor_model_type);
35801 /* Get the last field, which is __cpu_features. */
35802 while (DECL_CHAIN (field))
35803 field = DECL_CHAIN (field);
35805 /* Get the appropriate field: __cpu_model.__cpu_features */
35806 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35807 field, NULL_TREE);
35809 /* Access the 0th element of __cpu_features array. */
35810 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35811 integer_zero_node, NULL_TREE, NULL_TREE);
35813 field_val = (1 << isa_names_table[i].feature);
35814 /* Return __cpu_model.__cpu_features[0] & field_val */
35815 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35816 build_int_cstu (unsigned_type_node, field_val));
35817 return build1 (CONVERT_EXPR, integer_type_node, final);
35819 gcc_unreachable ();
35822 static tree
35823 ix86_fold_builtin (tree fndecl, int n_args,
35824 tree *args, bool ignore ATTRIBUTE_UNUSED)
35826 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35828 enum ix86_builtins fn_code = (enum ix86_builtins)
35829 DECL_FUNCTION_CODE (fndecl);
35830 if (fn_code == IX86_BUILTIN_CPU_IS
35831 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35833 gcc_assert (n_args == 1);
35834 return fold_builtin_cpu (fndecl, args);
35838 #ifdef SUBTARGET_FOLD_BUILTIN
35839 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35840 #endif
35842 return NULL_TREE;
35845 /* Make builtins to detect cpu type and features supported. NAME is
35846 the builtin name, CODE is the builtin code, and FTYPE is the function
35847 type of the builtin. */
35849 static void
35850 make_cpu_type_builtin (const char* name, int code,
35851 enum ix86_builtin_func_type ftype, bool is_const)
35853 tree decl;
35854 tree type;
35856 type = ix86_get_builtin_func_type (ftype);
35857 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35858 NULL, NULL_TREE);
35859 gcc_assert (decl != NULL_TREE);
35860 ix86_builtins[(int) code] = decl;
35861 TREE_READONLY (decl) = is_const;
35864 /* Make builtins to get CPU type and features supported. The created
35865 builtins are :
35867 __builtin_cpu_init (), to detect cpu type and features,
35868 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35869 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35872 static void
35873 ix86_init_platform_type_builtins (void)
35875 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35876 INT_FTYPE_VOID, false);
35877 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35878 INT_FTYPE_PCCHAR, true);
35879 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35880 INT_FTYPE_PCCHAR, true);
35883 /* Internal method for ix86_init_builtins. */
35885 static void
35886 ix86_init_builtins_va_builtins_abi (void)
35888 tree ms_va_ref, sysv_va_ref;
35889 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35890 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35891 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35892 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35894 if (!TARGET_64BIT)
35895 return;
35896 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35897 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35898 ms_va_ref = build_reference_type (ms_va_list_type_node);
35899 sysv_va_ref =
35900 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35902 fnvoid_va_end_ms =
35903 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35904 fnvoid_va_start_ms =
35905 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35906 fnvoid_va_end_sysv =
35907 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35908 fnvoid_va_start_sysv =
35909 build_varargs_function_type_list (void_type_node, sysv_va_ref,
35910 NULL_TREE);
35911 fnvoid_va_copy_ms =
35912 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35913 NULL_TREE);
35914 fnvoid_va_copy_sysv =
35915 build_function_type_list (void_type_node, sysv_va_ref,
35916 sysv_va_ref, NULL_TREE);
35918 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35919 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35920 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35921 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35922 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35923 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35924 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35925 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35926 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35927 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35928 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35929 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35932 static void
35933 ix86_init_builtin_types (void)
35935 tree float128_type_node, float80_type_node;
35937 /* The __float80 type. */
35938 float80_type_node = long_double_type_node;
35939 if (TYPE_MODE (float80_type_node) != XFmode)
35941 /* The __float80 type. */
35942 float80_type_node = make_node (REAL_TYPE);
35944 TYPE_PRECISION (float80_type_node) = 80;
35945 layout_type (float80_type_node);
35947 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35949 /* The __float128 type. */
35950 float128_type_node = make_node (REAL_TYPE);
35951 TYPE_PRECISION (float128_type_node) = 128;
35952 layout_type (float128_type_node);
35953 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35955 /* This macro is built by i386-builtin-types.awk. */
35956 DEFINE_BUILTIN_PRIMITIVE_TYPES;
35959 static void
35960 ix86_init_builtins (void)
35962 tree t;
35964 ix86_init_builtin_types ();
35966 /* Builtins to get CPU type and features. */
35967 ix86_init_platform_type_builtins ();
35969 /* TFmode support builtins. */
35970 def_builtin_const (0, "__builtin_infq",
35971 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
35972 def_builtin_const (0, "__builtin_huge_valq",
35973 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
35975 /* We will expand them to normal call if SSE isn't available since
35976 they are used by libgcc. */
35977 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
35978 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
35979 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35980 TREE_READONLY (t) = 1;
35981 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35983 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35984 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35985 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35986 TREE_READONLY (t) = 1;
35987 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35989 ix86_init_tm_builtins ();
35990 ix86_init_mmx_sse_builtins ();
35991 ix86_init_mpx_builtins ();
35993 if (TARGET_LP64)
35994 ix86_init_builtins_va_builtins_abi ();
35996 #ifdef SUBTARGET_INIT_BUILTINS
35997 SUBTARGET_INIT_BUILTINS;
35998 #endif
36001 /* Return the ix86 builtin for CODE. */
36003 static tree
36004 ix86_builtin_decl (unsigned code, bool)
36006 if (code >= IX86_BUILTIN_MAX)
36007 return error_mark_node;
36009 return ix86_builtins[code];
36012 /* Errors in the source file can cause expand_expr to return const0_rtx
36013 where we expect a vector. To avoid crashing, use one of the vector
36014 clear instructions. */
36015 static rtx
36016 safe_vector_operand (rtx x, machine_mode mode)
36018 if (x == const0_rtx)
36019 x = CONST0_RTX (mode);
36020 return x;
36023 /* Fixup modeless constants to fit required mode. */
36024 static rtx
36025 fixup_modeless_constant (rtx x, machine_mode mode)
36027 if (GET_MODE (x) == VOIDmode)
36028 x = convert_to_mode (mode, x, 1);
36029 return x;
36032 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
36034 static rtx
36035 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
36037 rtx pat;
36038 tree arg0 = CALL_EXPR_ARG (exp, 0);
36039 tree arg1 = CALL_EXPR_ARG (exp, 1);
36040 rtx op0 = expand_normal (arg0);
36041 rtx op1 = expand_normal (arg1);
36042 machine_mode tmode = insn_data[icode].operand[0].mode;
36043 machine_mode mode0 = insn_data[icode].operand[1].mode;
36044 machine_mode mode1 = insn_data[icode].operand[2].mode;
36046 if (VECTOR_MODE_P (mode0))
36047 op0 = safe_vector_operand (op0, mode0);
36048 if (VECTOR_MODE_P (mode1))
36049 op1 = safe_vector_operand (op1, mode1);
36051 if (optimize || !target
36052 || GET_MODE (target) != tmode
36053 || !insn_data[icode].operand[0].predicate (target, tmode))
36054 target = gen_reg_rtx (tmode);
36056 if (GET_MODE (op1) == SImode && mode1 == TImode)
36058 rtx x = gen_reg_rtx (V4SImode);
36059 emit_insn (gen_sse2_loadd (x, op1));
36060 op1 = gen_lowpart (TImode, x);
36063 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36064 op0 = copy_to_mode_reg (mode0, op0);
36065 if (!insn_data[icode].operand[2].predicate (op1, mode1))
36066 op1 = copy_to_mode_reg (mode1, op1);
36068 pat = GEN_FCN (icode) (target, op0, op1);
36069 if (! pat)
36070 return 0;
36072 emit_insn (pat);
36074 return target;
36077 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
36079 static rtx
36080 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36081 enum ix86_builtin_func_type m_type,
36082 enum rtx_code sub_code)
36084 rtx pat;
36085 int i;
36086 int nargs;
36087 bool comparison_p = false;
36088 bool tf_p = false;
36089 bool last_arg_constant = false;
36090 int num_memory = 0;
36091 struct {
36092 rtx op;
36093 machine_mode mode;
36094 } args[4];
36096 machine_mode tmode = insn_data[icode].operand[0].mode;
36098 switch (m_type)
36100 case MULTI_ARG_4_DF2_DI_I:
36101 case MULTI_ARG_4_DF2_DI_I1:
36102 case MULTI_ARG_4_SF2_SI_I:
36103 case MULTI_ARG_4_SF2_SI_I1:
36104 nargs = 4;
36105 last_arg_constant = true;
36106 break;
36108 case MULTI_ARG_3_SF:
36109 case MULTI_ARG_3_DF:
36110 case MULTI_ARG_3_SF2:
36111 case MULTI_ARG_3_DF2:
36112 case MULTI_ARG_3_DI:
36113 case MULTI_ARG_3_SI:
36114 case MULTI_ARG_3_SI_DI:
36115 case MULTI_ARG_3_HI:
36116 case MULTI_ARG_3_HI_SI:
36117 case MULTI_ARG_3_QI:
36118 case MULTI_ARG_3_DI2:
36119 case MULTI_ARG_3_SI2:
36120 case MULTI_ARG_3_HI2:
36121 case MULTI_ARG_3_QI2:
36122 nargs = 3;
36123 break;
36125 case MULTI_ARG_2_SF:
36126 case MULTI_ARG_2_DF:
36127 case MULTI_ARG_2_DI:
36128 case MULTI_ARG_2_SI:
36129 case MULTI_ARG_2_HI:
36130 case MULTI_ARG_2_QI:
36131 nargs = 2;
36132 break;
36134 case MULTI_ARG_2_DI_IMM:
36135 case MULTI_ARG_2_SI_IMM:
36136 case MULTI_ARG_2_HI_IMM:
36137 case MULTI_ARG_2_QI_IMM:
36138 nargs = 2;
36139 last_arg_constant = true;
36140 break;
36142 case MULTI_ARG_1_SF:
36143 case MULTI_ARG_1_DF:
36144 case MULTI_ARG_1_SF2:
36145 case MULTI_ARG_1_DF2:
36146 case MULTI_ARG_1_DI:
36147 case MULTI_ARG_1_SI:
36148 case MULTI_ARG_1_HI:
36149 case MULTI_ARG_1_QI:
36150 case MULTI_ARG_1_SI_DI:
36151 case MULTI_ARG_1_HI_DI:
36152 case MULTI_ARG_1_HI_SI:
36153 case MULTI_ARG_1_QI_DI:
36154 case MULTI_ARG_1_QI_SI:
36155 case MULTI_ARG_1_QI_HI:
36156 nargs = 1;
36157 break;
36159 case MULTI_ARG_2_DI_CMP:
36160 case MULTI_ARG_2_SI_CMP:
36161 case MULTI_ARG_2_HI_CMP:
36162 case MULTI_ARG_2_QI_CMP:
36163 nargs = 2;
36164 comparison_p = true;
36165 break;
36167 case MULTI_ARG_2_SF_TF:
36168 case MULTI_ARG_2_DF_TF:
36169 case MULTI_ARG_2_DI_TF:
36170 case MULTI_ARG_2_SI_TF:
36171 case MULTI_ARG_2_HI_TF:
36172 case MULTI_ARG_2_QI_TF:
36173 nargs = 2;
36174 tf_p = true;
36175 break;
36177 default:
36178 gcc_unreachable ();
36181 if (optimize || !target
36182 || GET_MODE (target) != tmode
36183 || !insn_data[icode].operand[0].predicate (target, tmode))
36184 target = gen_reg_rtx (tmode);
36186 gcc_assert (nargs <= 4);
36188 for (i = 0; i < nargs; i++)
36190 tree arg = CALL_EXPR_ARG (exp, i);
36191 rtx op = expand_normal (arg);
36192 int adjust = (comparison_p) ? 1 : 0;
36193 machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36195 if (last_arg_constant && i == nargs - 1)
36197 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36199 enum insn_code new_icode = icode;
36200 switch (icode)
36202 case CODE_FOR_xop_vpermil2v2df3:
36203 case CODE_FOR_xop_vpermil2v4sf3:
36204 case CODE_FOR_xop_vpermil2v4df3:
36205 case CODE_FOR_xop_vpermil2v8sf3:
36206 error ("the last argument must be a 2-bit immediate");
36207 return gen_reg_rtx (tmode);
36208 case CODE_FOR_xop_rotlv2di3:
36209 new_icode = CODE_FOR_rotlv2di3;
36210 goto xop_rotl;
36211 case CODE_FOR_xop_rotlv4si3:
36212 new_icode = CODE_FOR_rotlv4si3;
36213 goto xop_rotl;
36214 case CODE_FOR_xop_rotlv8hi3:
36215 new_icode = CODE_FOR_rotlv8hi3;
36216 goto xop_rotl;
36217 case CODE_FOR_xop_rotlv16qi3:
36218 new_icode = CODE_FOR_rotlv16qi3;
36219 xop_rotl:
36220 if (CONST_INT_P (op))
36222 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36223 op = GEN_INT (INTVAL (op) & mask);
36224 gcc_checking_assert
36225 (insn_data[icode].operand[i + 1].predicate (op, mode));
36227 else
36229 gcc_checking_assert
36230 (nargs == 2
36231 && insn_data[new_icode].operand[0].mode == tmode
36232 && insn_data[new_icode].operand[1].mode == tmode
36233 && insn_data[new_icode].operand[2].mode == mode
36234 && insn_data[new_icode].operand[0].predicate
36235 == insn_data[icode].operand[0].predicate
36236 && insn_data[new_icode].operand[1].predicate
36237 == insn_data[icode].operand[1].predicate);
36238 icode = new_icode;
36239 goto non_constant;
36241 break;
36242 default:
36243 gcc_unreachable ();
36247 else
36249 non_constant:
36250 if (VECTOR_MODE_P (mode))
36251 op = safe_vector_operand (op, mode);
36253 /* If we aren't optimizing, only allow one memory operand to be
36254 generated. */
36255 if (memory_operand (op, mode))
36256 num_memory++;
36258 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36260 if (optimize
36261 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36262 || num_memory > 1)
36263 op = force_reg (mode, op);
36266 args[i].op = op;
36267 args[i].mode = mode;
36270 switch (nargs)
36272 case 1:
36273 pat = GEN_FCN (icode) (target, args[0].op);
36274 break;
36276 case 2:
36277 if (tf_p)
36278 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36279 GEN_INT ((int)sub_code));
36280 else if (! comparison_p)
36281 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36282 else
36284 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36285 args[0].op,
36286 args[1].op);
36288 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36290 break;
36292 case 3:
36293 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36294 break;
36296 case 4:
36297 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36298 break;
36300 default:
36301 gcc_unreachable ();
36304 if (! pat)
36305 return 0;
36307 emit_insn (pat);
36308 return target;
36311 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36312 insns with vec_merge. */
36314 static rtx
36315 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36316 rtx target)
36318 rtx pat;
36319 tree arg0 = CALL_EXPR_ARG (exp, 0);
36320 rtx op1, op0 = expand_normal (arg0);
36321 machine_mode tmode = insn_data[icode].operand[0].mode;
36322 machine_mode mode0 = insn_data[icode].operand[1].mode;
36324 if (optimize || !target
36325 || GET_MODE (target) != tmode
36326 || !insn_data[icode].operand[0].predicate (target, tmode))
36327 target = gen_reg_rtx (tmode);
36329 if (VECTOR_MODE_P (mode0))
36330 op0 = safe_vector_operand (op0, mode0);
36332 if ((optimize && !register_operand (op0, mode0))
36333 || !insn_data[icode].operand[1].predicate (op0, mode0))
36334 op0 = copy_to_mode_reg (mode0, op0);
36336 op1 = op0;
36337 if (!insn_data[icode].operand[2].predicate (op1, mode0))
36338 op1 = copy_to_mode_reg (mode0, op1);
36340 pat = GEN_FCN (icode) (target, op0, op1);
36341 if (! pat)
36342 return 0;
36343 emit_insn (pat);
36344 return target;
36347 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
36349 static rtx
36350 ix86_expand_sse_compare (const struct builtin_description *d,
36351 tree exp, rtx target, bool swap)
36353 rtx pat;
36354 tree arg0 = CALL_EXPR_ARG (exp, 0);
36355 tree arg1 = CALL_EXPR_ARG (exp, 1);
36356 rtx op0 = expand_normal (arg0);
36357 rtx op1 = expand_normal (arg1);
36358 rtx op2;
36359 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36360 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36361 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36362 enum rtx_code comparison = d->comparison;
36364 if (VECTOR_MODE_P (mode0))
36365 op0 = safe_vector_operand (op0, mode0);
36366 if (VECTOR_MODE_P (mode1))
36367 op1 = safe_vector_operand (op1, mode1);
36369 /* Swap operands if we have a comparison that isn't available in
36370 hardware. */
36371 if (swap)
36372 std::swap (op0, op1);
36374 if (optimize || !target
36375 || GET_MODE (target) != tmode
36376 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36377 target = gen_reg_rtx (tmode);
36379 if ((optimize && !register_operand (op0, mode0))
36380 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36381 op0 = copy_to_mode_reg (mode0, op0);
36382 if ((optimize && !register_operand (op1, mode1))
36383 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36384 op1 = copy_to_mode_reg (mode1, op1);
36386 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36387 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36388 if (! pat)
36389 return 0;
36390 emit_insn (pat);
36391 return target;
36394 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
36396 static rtx
36397 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36398 rtx target)
36400 rtx pat;
36401 tree arg0 = CALL_EXPR_ARG (exp, 0);
36402 tree arg1 = CALL_EXPR_ARG (exp, 1);
36403 rtx op0 = expand_normal (arg0);
36404 rtx op1 = expand_normal (arg1);
36405 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36406 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36407 enum rtx_code comparison = d->comparison;
36409 if (VECTOR_MODE_P (mode0))
36410 op0 = safe_vector_operand (op0, mode0);
36411 if (VECTOR_MODE_P (mode1))
36412 op1 = safe_vector_operand (op1, mode1);
36414 /* Swap operands if we have a comparison that isn't available in
36415 hardware. */
36416 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36417 std::swap (op0, op1);
36419 target = gen_reg_rtx (SImode);
36420 emit_move_insn (target, const0_rtx);
36421 target = gen_rtx_SUBREG (QImode, target, 0);
36423 if ((optimize && !register_operand (op0, mode0))
36424 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36425 op0 = copy_to_mode_reg (mode0, op0);
36426 if ((optimize && !register_operand (op1, mode1))
36427 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36428 op1 = copy_to_mode_reg (mode1, op1);
36430 pat = GEN_FCN (d->icode) (op0, op1);
36431 if (! pat)
36432 return 0;
36433 emit_insn (pat);
36434 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36435 gen_rtx_fmt_ee (comparison, QImode,
36436 SET_DEST (pat),
36437 const0_rtx)));
36439 return SUBREG_REG (target);
36442 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
36444 static rtx
36445 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36446 rtx target)
36448 rtx pat;
36449 tree arg0 = CALL_EXPR_ARG (exp, 0);
36450 rtx op1, op0 = expand_normal (arg0);
36451 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36452 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36454 if (optimize || target == 0
36455 || GET_MODE (target) != tmode
36456 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36457 target = gen_reg_rtx (tmode);
36459 if (VECTOR_MODE_P (mode0))
36460 op0 = safe_vector_operand (op0, mode0);
36462 if ((optimize && !register_operand (op0, mode0))
36463 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36464 op0 = copy_to_mode_reg (mode0, op0);
36466 op1 = GEN_INT (d->comparison);
36468 pat = GEN_FCN (d->icode) (target, op0, op1);
36469 if (! pat)
36470 return 0;
36471 emit_insn (pat);
36472 return target;
36475 static rtx
36476 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36477 tree exp, rtx target)
36479 rtx pat;
36480 tree arg0 = CALL_EXPR_ARG (exp, 0);
36481 tree arg1 = CALL_EXPR_ARG (exp, 1);
36482 rtx op0 = expand_normal (arg0);
36483 rtx op1 = expand_normal (arg1);
36484 rtx op2;
36485 machine_mode tmode = insn_data[d->icode].operand[0].mode;
36486 machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36487 machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36489 if (optimize || target == 0
36490 || GET_MODE (target) != tmode
36491 || !insn_data[d->icode].operand[0].predicate (target, tmode))
36492 target = gen_reg_rtx (tmode);
36494 op0 = safe_vector_operand (op0, mode0);
36495 op1 = safe_vector_operand (op1, mode1);
36497 if ((optimize && !register_operand (op0, mode0))
36498 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36499 op0 = copy_to_mode_reg (mode0, op0);
36500 if ((optimize && !register_operand (op1, mode1))
36501 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36502 op1 = copy_to_mode_reg (mode1, op1);
36504 op2 = GEN_INT (d->comparison);
36506 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36507 if (! pat)
36508 return 0;
36509 emit_insn (pat);
36510 return target;
36513 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
36515 static rtx
36516 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36517 rtx target)
36519 rtx pat;
36520 tree arg0 = CALL_EXPR_ARG (exp, 0);
36521 tree arg1 = CALL_EXPR_ARG (exp, 1);
36522 rtx op0 = expand_normal (arg0);
36523 rtx op1 = expand_normal (arg1);
36524 machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36525 machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36526 enum rtx_code comparison = d->comparison;
36528 if (VECTOR_MODE_P (mode0))
36529 op0 = safe_vector_operand (op0, mode0);
36530 if (VECTOR_MODE_P (mode1))
36531 op1 = safe_vector_operand (op1, mode1);
36533 target = gen_reg_rtx (SImode);
36534 emit_move_insn (target, const0_rtx);
36535 target = gen_rtx_SUBREG (QImode, target, 0);
36537 if ((optimize && !register_operand (op0, mode0))
36538 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36539 op0 = copy_to_mode_reg (mode0, op0);
36540 if ((optimize && !register_operand (op1, mode1))
36541 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36542 op1 = copy_to_mode_reg (mode1, op1);
36544 pat = GEN_FCN (d->icode) (op0, op1);
36545 if (! pat)
36546 return 0;
36547 emit_insn (pat);
36548 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36549 gen_rtx_fmt_ee (comparison, QImode,
36550 SET_DEST (pat),
36551 const0_rtx)));
36553 return SUBREG_REG (target);
36556 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
36558 static rtx
36559 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36560 tree exp, rtx target)
36562 rtx pat;
36563 tree arg0 = CALL_EXPR_ARG (exp, 0);
36564 tree arg1 = CALL_EXPR_ARG (exp, 1);
36565 tree arg2 = CALL_EXPR_ARG (exp, 2);
36566 tree arg3 = CALL_EXPR_ARG (exp, 3);
36567 tree arg4 = CALL_EXPR_ARG (exp, 4);
36568 rtx scratch0, scratch1;
36569 rtx op0 = expand_normal (arg0);
36570 rtx op1 = expand_normal (arg1);
36571 rtx op2 = expand_normal (arg2);
36572 rtx op3 = expand_normal (arg3);
36573 rtx op4 = expand_normal (arg4);
36574 machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36576 tmode0 = insn_data[d->icode].operand[0].mode;
36577 tmode1 = insn_data[d->icode].operand[1].mode;
36578 modev2 = insn_data[d->icode].operand[2].mode;
36579 modei3 = insn_data[d->icode].operand[3].mode;
36580 modev4 = insn_data[d->icode].operand[4].mode;
36581 modei5 = insn_data[d->icode].operand[5].mode;
36582 modeimm = insn_data[d->icode].operand[6].mode;
36584 if (VECTOR_MODE_P (modev2))
36585 op0 = safe_vector_operand (op0, modev2);
36586 if (VECTOR_MODE_P (modev4))
36587 op2 = safe_vector_operand (op2, modev4);
36589 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36590 op0 = copy_to_mode_reg (modev2, op0);
36591 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36592 op1 = copy_to_mode_reg (modei3, op1);
36593 if ((optimize && !register_operand (op2, modev4))
36594 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36595 op2 = copy_to_mode_reg (modev4, op2);
36596 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36597 op3 = copy_to_mode_reg (modei5, op3);
36599 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36601 error ("the fifth argument must be an 8-bit immediate");
36602 return const0_rtx;
36605 if (d->code == IX86_BUILTIN_PCMPESTRI128)
36607 if (optimize || !target
36608 || GET_MODE (target) != tmode0
36609 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36610 target = gen_reg_rtx (tmode0);
36612 scratch1 = gen_reg_rtx (tmode1);
36614 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36616 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36618 if (optimize || !target
36619 || GET_MODE (target) != tmode1
36620 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36621 target = gen_reg_rtx (tmode1);
36623 scratch0 = gen_reg_rtx (tmode0);
36625 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36627 else
36629 gcc_assert (d->flag);
36631 scratch0 = gen_reg_rtx (tmode0);
36632 scratch1 = gen_reg_rtx (tmode1);
36634 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36637 if (! pat)
36638 return 0;
36640 emit_insn (pat);
36642 if (d->flag)
36644 target = gen_reg_rtx (SImode);
36645 emit_move_insn (target, const0_rtx);
36646 target = gen_rtx_SUBREG (QImode, target, 0);
36648 emit_insn
36649 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36650 gen_rtx_fmt_ee (EQ, QImode,
36651 gen_rtx_REG ((machine_mode) d->flag,
36652 FLAGS_REG),
36653 const0_rtx)));
36654 return SUBREG_REG (target);
36656 else
36657 return target;
36661 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
36663 static rtx
36664 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36665 tree exp, rtx target)
36667 rtx pat;
36668 tree arg0 = CALL_EXPR_ARG (exp, 0);
36669 tree arg1 = CALL_EXPR_ARG (exp, 1);
36670 tree arg2 = CALL_EXPR_ARG (exp, 2);
36671 rtx scratch0, scratch1;
36672 rtx op0 = expand_normal (arg0);
36673 rtx op1 = expand_normal (arg1);
36674 rtx op2 = expand_normal (arg2);
36675 machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36677 tmode0 = insn_data[d->icode].operand[0].mode;
36678 tmode1 = insn_data[d->icode].operand[1].mode;
36679 modev2 = insn_data[d->icode].operand[2].mode;
36680 modev3 = insn_data[d->icode].operand[3].mode;
36681 modeimm = insn_data[d->icode].operand[4].mode;
36683 if (VECTOR_MODE_P (modev2))
36684 op0 = safe_vector_operand (op0, modev2);
36685 if (VECTOR_MODE_P (modev3))
36686 op1 = safe_vector_operand (op1, modev3);
36688 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36689 op0 = copy_to_mode_reg (modev2, op0);
36690 if ((optimize && !register_operand (op1, modev3))
36691 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36692 op1 = copy_to_mode_reg (modev3, op1);
36694 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36696 error ("the third argument must be an 8-bit immediate");
36697 return const0_rtx;
36700 if (d->code == IX86_BUILTIN_PCMPISTRI128)
36702 if (optimize || !target
36703 || GET_MODE (target) != tmode0
36704 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36705 target = gen_reg_rtx (tmode0);
36707 scratch1 = gen_reg_rtx (tmode1);
36709 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36711 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36713 if (optimize || !target
36714 || GET_MODE (target) != tmode1
36715 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36716 target = gen_reg_rtx (tmode1);
36718 scratch0 = gen_reg_rtx (tmode0);
36720 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36722 else
36724 gcc_assert (d->flag);
36726 scratch0 = gen_reg_rtx (tmode0);
36727 scratch1 = gen_reg_rtx (tmode1);
36729 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36732 if (! pat)
36733 return 0;
36735 emit_insn (pat);
36737 if (d->flag)
36739 target = gen_reg_rtx (SImode);
36740 emit_move_insn (target, const0_rtx);
36741 target = gen_rtx_SUBREG (QImode, target, 0);
36743 emit_insn
36744 (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36745 gen_rtx_fmt_ee (EQ, QImode,
36746 gen_rtx_REG ((machine_mode) d->flag,
36747 FLAGS_REG),
36748 const0_rtx)));
36749 return SUBREG_REG (target);
36751 else
36752 return target;
36755 /* Subroutine of ix86_expand_builtin to take care of insns with
36756 variable number of operands. */
36758 static rtx
36759 ix86_expand_args_builtin (const struct builtin_description *d,
36760 tree exp, rtx target)
36762 rtx pat, real_target;
36763 unsigned int i, nargs;
36764 unsigned int nargs_constant = 0;
36765 unsigned int mask_pos = 0;
36766 int num_memory = 0;
36767 struct
36769 rtx op;
36770 machine_mode mode;
36771 } args[6];
36772 bool last_arg_count = false;
36773 enum insn_code icode = d->icode;
36774 const struct insn_data_d *insn_p = &insn_data[icode];
36775 machine_mode tmode = insn_p->operand[0].mode;
36776 machine_mode rmode = VOIDmode;
36777 bool swap = false;
36778 enum rtx_code comparison = d->comparison;
36780 switch ((enum ix86_builtin_func_type) d->flag)
36782 case V2DF_FTYPE_V2DF_ROUND:
36783 case V4DF_FTYPE_V4DF_ROUND:
36784 case V4SF_FTYPE_V4SF_ROUND:
36785 case V8SF_FTYPE_V8SF_ROUND:
36786 case V4SI_FTYPE_V4SF_ROUND:
36787 case V8SI_FTYPE_V8SF_ROUND:
36788 return ix86_expand_sse_round (d, exp, target);
36789 case V4SI_FTYPE_V2DF_V2DF_ROUND:
36790 case V8SI_FTYPE_V4DF_V4DF_ROUND:
36791 case V16SI_FTYPE_V8DF_V8DF_ROUND:
36792 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36793 case INT_FTYPE_V8SF_V8SF_PTEST:
36794 case INT_FTYPE_V4DI_V4DI_PTEST:
36795 case INT_FTYPE_V4DF_V4DF_PTEST:
36796 case INT_FTYPE_V4SF_V4SF_PTEST:
36797 case INT_FTYPE_V2DI_V2DI_PTEST:
36798 case INT_FTYPE_V2DF_V2DF_PTEST:
36799 return ix86_expand_sse_ptest (d, exp, target);
36800 case FLOAT128_FTYPE_FLOAT128:
36801 case FLOAT_FTYPE_FLOAT:
36802 case INT_FTYPE_INT:
36803 case UINT64_FTYPE_INT:
36804 case UINT16_FTYPE_UINT16:
36805 case INT64_FTYPE_INT64:
36806 case INT64_FTYPE_V4SF:
36807 case INT64_FTYPE_V2DF:
36808 case INT_FTYPE_V16QI:
36809 case INT_FTYPE_V8QI:
36810 case INT_FTYPE_V8SF:
36811 case INT_FTYPE_V4DF:
36812 case INT_FTYPE_V4SF:
36813 case INT_FTYPE_V2DF:
36814 case INT_FTYPE_V32QI:
36815 case V16QI_FTYPE_V16QI:
36816 case V8SI_FTYPE_V8SF:
36817 case V8SI_FTYPE_V4SI:
36818 case V8HI_FTYPE_V8HI:
36819 case V8HI_FTYPE_V16QI:
36820 case V8QI_FTYPE_V8QI:
36821 case V8SF_FTYPE_V8SF:
36822 case V8SF_FTYPE_V8SI:
36823 case V8SF_FTYPE_V4SF:
36824 case V8SF_FTYPE_V8HI:
36825 case V4SI_FTYPE_V4SI:
36826 case V4SI_FTYPE_V16QI:
36827 case V4SI_FTYPE_V4SF:
36828 case V4SI_FTYPE_V8SI:
36829 case V4SI_FTYPE_V8HI:
36830 case V4SI_FTYPE_V4DF:
36831 case V4SI_FTYPE_V2DF:
36832 case V4HI_FTYPE_V4HI:
36833 case V4DF_FTYPE_V4DF:
36834 case V4DF_FTYPE_V4SI:
36835 case V4DF_FTYPE_V4SF:
36836 case V4DF_FTYPE_V2DF:
36837 case V4SF_FTYPE_V4SF:
36838 case V4SF_FTYPE_V4SI:
36839 case V4SF_FTYPE_V8SF:
36840 case V4SF_FTYPE_V4DF:
36841 case V4SF_FTYPE_V8HI:
36842 case V4SF_FTYPE_V2DF:
36843 case V2DI_FTYPE_V2DI:
36844 case V2DI_FTYPE_V16QI:
36845 case V2DI_FTYPE_V8HI:
36846 case V2DI_FTYPE_V4SI:
36847 case V2DF_FTYPE_V2DF:
36848 case V2DF_FTYPE_V4SI:
36849 case V2DF_FTYPE_V4DF:
36850 case V2DF_FTYPE_V4SF:
36851 case V2DF_FTYPE_V2SI:
36852 case V2SI_FTYPE_V2SI:
36853 case V2SI_FTYPE_V4SF:
36854 case V2SI_FTYPE_V2SF:
36855 case V2SI_FTYPE_V2DF:
36856 case V2SF_FTYPE_V2SF:
36857 case V2SF_FTYPE_V2SI:
36858 case V32QI_FTYPE_V32QI:
36859 case V32QI_FTYPE_V16QI:
36860 case V16HI_FTYPE_V16HI:
36861 case V16HI_FTYPE_V8HI:
36862 case V8SI_FTYPE_V8SI:
36863 case V16HI_FTYPE_V16QI:
36864 case V8SI_FTYPE_V16QI:
36865 case V4DI_FTYPE_V16QI:
36866 case V8SI_FTYPE_V8HI:
36867 case V4DI_FTYPE_V8HI:
36868 case V4DI_FTYPE_V4SI:
36869 case V4DI_FTYPE_V2DI:
36870 case HI_FTYPE_HI:
36871 case HI_FTYPE_V16QI:
36872 case SI_FTYPE_V32QI:
36873 case DI_FTYPE_V64QI:
36874 case V16QI_FTYPE_HI:
36875 case V32QI_FTYPE_SI:
36876 case V64QI_FTYPE_DI:
36877 case V8HI_FTYPE_QI:
36878 case V16HI_FTYPE_HI:
36879 case V32HI_FTYPE_SI:
36880 case V4SI_FTYPE_QI:
36881 case V8SI_FTYPE_QI:
36882 case V4SI_FTYPE_HI:
36883 case V8SI_FTYPE_HI:
36884 case QI_FTYPE_V8HI:
36885 case HI_FTYPE_V16HI:
36886 case SI_FTYPE_V32HI:
36887 case QI_FTYPE_V4SI:
36888 case QI_FTYPE_V8SI:
36889 case HI_FTYPE_V16SI:
36890 case QI_FTYPE_V2DI:
36891 case QI_FTYPE_V4DI:
36892 case QI_FTYPE_V8DI:
36893 case UINT_FTYPE_V2DF:
36894 case UINT_FTYPE_V4SF:
36895 case UINT64_FTYPE_V2DF:
36896 case UINT64_FTYPE_V4SF:
36897 case V16QI_FTYPE_V8DI:
36898 case V16HI_FTYPE_V16SI:
36899 case V16SI_FTYPE_HI:
36900 case V2DI_FTYPE_QI:
36901 case V4DI_FTYPE_QI:
36902 case V16SI_FTYPE_V16SI:
36903 case V16SI_FTYPE_INT:
36904 case V16SF_FTYPE_FLOAT:
36905 case V16SF_FTYPE_V8SF:
36906 case V16SI_FTYPE_V8SI:
36907 case V16SF_FTYPE_V4SF:
36908 case V16SI_FTYPE_V4SI:
36909 case V16SF_FTYPE_V16SF:
36910 case V8HI_FTYPE_V8DI:
36911 case V8UHI_FTYPE_V8UHI:
36912 case V8SI_FTYPE_V8DI:
36913 case V8SF_FTYPE_V8DF:
36914 case V8DI_FTYPE_QI:
36915 case V8DI_FTYPE_INT64:
36916 case V8DI_FTYPE_V4DI:
36917 case V8DI_FTYPE_V8DI:
36918 case V8DF_FTYPE_DOUBLE:
36919 case V8DF_FTYPE_V4DF:
36920 case V8DF_FTYPE_V2DF:
36921 case V8DF_FTYPE_V8DF:
36922 case V8DF_FTYPE_V8SI:
36923 nargs = 1;
36924 break;
36925 case V4SF_FTYPE_V4SF_VEC_MERGE:
36926 case V2DF_FTYPE_V2DF_VEC_MERGE:
36927 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36928 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36929 case V16QI_FTYPE_V16QI_V16QI:
36930 case V16QI_FTYPE_V8HI_V8HI:
36931 case V16SI_FTYPE_V16SI_V16SI:
36932 case V16SF_FTYPE_V16SF_V16SF:
36933 case V16SF_FTYPE_V16SF_V16SI:
36934 case V8QI_FTYPE_V8QI_V8QI:
36935 case V8QI_FTYPE_V4HI_V4HI:
36936 case V8HI_FTYPE_V8HI_V8HI:
36937 case V8HI_FTYPE_V16QI_V16QI:
36938 case V8HI_FTYPE_V4SI_V4SI:
36939 case V8SF_FTYPE_V8SF_V8SF:
36940 case V8SF_FTYPE_V8SF_V8SI:
36941 case V8DI_FTYPE_V8DI_V8DI:
36942 case V8DF_FTYPE_V8DF_V8DF:
36943 case V8DF_FTYPE_V8DF_V8DI:
36944 case V4SI_FTYPE_V4SI_V4SI:
36945 case V4SI_FTYPE_V8HI_V8HI:
36946 case V4SI_FTYPE_V4SF_V4SF:
36947 case V4SI_FTYPE_V2DF_V2DF:
36948 case V4HI_FTYPE_V4HI_V4HI:
36949 case V4HI_FTYPE_V8QI_V8QI:
36950 case V4HI_FTYPE_V2SI_V2SI:
36951 case V4DF_FTYPE_V4DF_V4DF:
36952 case V4DF_FTYPE_V4DF_V4DI:
36953 case V4SF_FTYPE_V4SF_V4SF:
36954 case V4SF_FTYPE_V4SF_V4SI:
36955 case V4SF_FTYPE_V4SF_V2SI:
36956 case V4SF_FTYPE_V4SF_V2DF:
36957 case V4SF_FTYPE_V4SF_UINT:
36958 case V4SF_FTYPE_V4SF_UINT64:
36959 case V4SF_FTYPE_V4SF_DI:
36960 case V4SF_FTYPE_V4SF_SI:
36961 case V2DI_FTYPE_V2DI_V2DI:
36962 case V2DI_FTYPE_V16QI_V16QI:
36963 case V2DI_FTYPE_V4SI_V4SI:
36964 case V2UDI_FTYPE_V4USI_V4USI:
36965 case V2DI_FTYPE_V2DI_V16QI:
36966 case V2DI_FTYPE_V2DF_V2DF:
36967 case V2SI_FTYPE_V2SI_V2SI:
36968 case V2SI_FTYPE_V4HI_V4HI:
36969 case V2SI_FTYPE_V2SF_V2SF:
36970 case V2DF_FTYPE_V2DF_V2DF:
36971 case V2DF_FTYPE_V2DF_V4SF:
36972 case V2DF_FTYPE_V2DF_V2DI:
36973 case V2DF_FTYPE_V2DF_DI:
36974 case V2DF_FTYPE_V2DF_SI:
36975 case V2DF_FTYPE_V2DF_UINT:
36976 case V2DF_FTYPE_V2DF_UINT64:
36977 case V2SF_FTYPE_V2SF_V2SF:
36978 case V1DI_FTYPE_V1DI_V1DI:
36979 case V1DI_FTYPE_V8QI_V8QI:
36980 case V1DI_FTYPE_V2SI_V2SI:
36981 case V32QI_FTYPE_V16HI_V16HI:
36982 case V16HI_FTYPE_V8SI_V8SI:
36983 case V32QI_FTYPE_V32QI_V32QI:
36984 case V16HI_FTYPE_V32QI_V32QI:
36985 case V16HI_FTYPE_V16HI_V16HI:
36986 case V8SI_FTYPE_V4DF_V4DF:
36987 case V8SI_FTYPE_V8SI_V8SI:
36988 case V8SI_FTYPE_V16HI_V16HI:
36989 case V4DI_FTYPE_V4DI_V4DI:
36990 case V4DI_FTYPE_V8SI_V8SI:
36991 case V4UDI_FTYPE_V8USI_V8USI:
36992 case QI_FTYPE_V8DI_V8DI:
36993 case V8DI_FTYPE_V64QI_V64QI:
36994 case HI_FTYPE_V16SI_V16SI:
36995 if (comparison == UNKNOWN)
36996 return ix86_expand_binop_builtin (icode, exp, target);
36997 nargs = 2;
36998 break;
36999 case V4SF_FTYPE_V4SF_V4SF_SWAP:
37000 case V2DF_FTYPE_V2DF_V2DF_SWAP:
37001 gcc_assert (comparison != UNKNOWN);
37002 nargs = 2;
37003 swap = true;
37004 break;
37005 case V16HI_FTYPE_V16HI_V8HI_COUNT:
37006 case V16HI_FTYPE_V16HI_SI_COUNT:
37007 case V8SI_FTYPE_V8SI_V4SI_COUNT:
37008 case V8SI_FTYPE_V8SI_SI_COUNT:
37009 case V4DI_FTYPE_V4DI_V2DI_COUNT:
37010 case V4DI_FTYPE_V4DI_INT_COUNT:
37011 case V8HI_FTYPE_V8HI_V8HI_COUNT:
37012 case V8HI_FTYPE_V8HI_SI_COUNT:
37013 case V4SI_FTYPE_V4SI_V4SI_COUNT:
37014 case V4SI_FTYPE_V4SI_SI_COUNT:
37015 case V4HI_FTYPE_V4HI_V4HI_COUNT:
37016 case V4HI_FTYPE_V4HI_SI_COUNT:
37017 case V2DI_FTYPE_V2DI_V2DI_COUNT:
37018 case V2DI_FTYPE_V2DI_SI_COUNT:
37019 case V2SI_FTYPE_V2SI_V2SI_COUNT:
37020 case V2SI_FTYPE_V2SI_SI_COUNT:
37021 case V1DI_FTYPE_V1DI_V1DI_COUNT:
37022 case V1DI_FTYPE_V1DI_SI_COUNT:
37023 nargs = 2;
37024 last_arg_count = true;
37025 break;
37026 case UINT64_FTYPE_UINT64_UINT64:
37027 case UINT_FTYPE_UINT_UINT:
37028 case UINT_FTYPE_UINT_USHORT:
37029 case UINT_FTYPE_UINT_UCHAR:
37030 case UINT16_FTYPE_UINT16_INT:
37031 case UINT8_FTYPE_UINT8_INT:
37032 case HI_FTYPE_HI_HI:
37033 case SI_FTYPE_SI_SI:
37034 case DI_FTYPE_DI_DI:
37035 case V16SI_FTYPE_V8DF_V8DF:
37036 nargs = 2;
37037 break;
37038 case V2DI_FTYPE_V2DI_INT_CONVERT:
37039 nargs = 2;
37040 rmode = V1TImode;
37041 nargs_constant = 1;
37042 break;
37043 case V4DI_FTYPE_V4DI_INT_CONVERT:
37044 nargs = 2;
37045 rmode = V2TImode;
37046 nargs_constant = 1;
37047 break;
37048 case V8DI_FTYPE_V8DI_INT_CONVERT:
37049 nargs = 2;
37050 rmode = V4TImode;
37051 nargs_constant = 1;
37052 break;
37053 case V8HI_FTYPE_V8HI_INT:
37054 case V8HI_FTYPE_V8SF_INT:
37055 case V16HI_FTYPE_V16SF_INT:
37056 case V8HI_FTYPE_V4SF_INT:
37057 case V8SF_FTYPE_V8SF_INT:
37058 case V4SF_FTYPE_V16SF_INT:
37059 case V16SF_FTYPE_V16SF_INT:
37060 case V4SI_FTYPE_V4SI_INT:
37061 case V4SI_FTYPE_V8SI_INT:
37062 case V4HI_FTYPE_V4HI_INT:
37063 case V4DF_FTYPE_V4DF_INT:
37064 case V4DF_FTYPE_V8DF_INT:
37065 case V4SF_FTYPE_V4SF_INT:
37066 case V4SF_FTYPE_V8SF_INT:
37067 case V2DI_FTYPE_V2DI_INT:
37068 case V2DF_FTYPE_V2DF_INT:
37069 case V2DF_FTYPE_V4DF_INT:
37070 case V16HI_FTYPE_V16HI_INT:
37071 case V8SI_FTYPE_V8SI_INT:
37072 case V16SI_FTYPE_V16SI_INT:
37073 case V4SI_FTYPE_V16SI_INT:
37074 case V4DI_FTYPE_V4DI_INT:
37075 case V2DI_FTYPE_V4DI_INT:
37076 case V4DI_FTYPE_V8DI_INT:
37077 case HI_FTYPE_HI_INT:
37078 case QI_FTYPE_V4SF_INT:
37079 case QI_FTYPE_V2DF_INT:
37080 nargs = 2;
37081 nargs_constant = 1;
37082 break;
37083 case V16QI_FTYPE_V16QI_V16QI_V16QI:
37084 case V8SF_FTYPE_V8SF_V8SF_V8SF:
37085 case V4DF_FTYPE_V4DF_V4DF_V4DF:
37086 case V4SF_FTYPE_V4SF_V4SF_V4SF:
37087 case V2DF_FTYPE_V2DF_V2DF_V2DF:
37088 case V32QI_FTYPE_V32QI_V32QI_V32QI:
37089 case HI_FTYPE_V16SI_V16SI_HI:
37090 case QI_FTYPE_V8DI_V8DI_QI:
37091 case V16HI_FTYPE_V16SI_V16HI_HI:
37092 case V16QI_FTYPE_V16SI_V16QI_HI:
37093 case V16QI_FTYPE_V8DI_V16QI_QI:
37094 case V16SF_FTYPE_V16SF_V16SF_HI:
37095 case V16SF_FTYPE_V16SF_V16SF_V16SF:
37096 case V16SF_FTYPE_V16SF_V16SI_V16SF:
37097 case V16SF_FTYPE_V16SI_V16SF_HI:
37098 case V16SF_FTYPE_V16SI_V16SF_V16SF:
37099 case V16SF_FTYPE_V4SF_V16SF_HI:
37100 case V16SI_FTYPE_SI_V16SI_HI:
37101 case V16SI_FTYPE_V16HI_V16SI_HI:
37102 case V16SI_FTYPE_V16QI_V16SI_HI:
37103 case V16SI_FTYPE_V16SF_V16SI_HI:
37104 case V8SF_FTYPE_V4SF_V8SF_QI:
37105 case V4DF_FTYPE_V2DF_V4DF_QI:
37106 case V8SI_FTYPE_V4SI_V8SI_QI:
37107 case V8SI_FTYPE_SI_V8SI_QI:
37108 case V4SI_FTYPE_V4SI_V4SI_QI:
37109 case V4SI_FTYPE_SI_V4SI_QI:
37110 case V4DI_FTYPE_V2DI_V4DI_QI:
37111 case V4DI_FTYPE_DI_V4DI_QI:
37112 case V2DI_FTYPE_V2DI_V2DI_QI:
37113 case V2DI_FTYPE_DI_V2DI_QI:
37114 case V64QI_FTYPE_V64QI_V64QI_DI:
37115 case V64QI_FTYPE_V16QI_V64QI_DI:
37116 case V64QI_FTYPE_QI_V64QI_DI:
37117 case V32QI_FTYPE_V32QI_V32QI_SI:
37118 case V32QI_FTYPE_V16QI_V32QI_SI:
37119 case V32QI_FTYPE_QI_V32QI_SI:
37120 case V16QI_FTYPE_V16QI_V16QI_HI:
37121 case V16QI_FTYPE_QI_V16QI_HI:
37122 case V32HI_FTYPE_V8HI_V32HI_SI:
37123 case V32HI_FTYPE_HI_V32HI_SI:
37124 case V16HI_FTYPE_V8HI_V16HI_HI:
37125 case V16HI_FTYPE_HI_V16HI_HI:
37126 case V8HI_FTYPE_V8HI_V8HI_QI:
37127 case V8HI_FTYPE_HI_V8HI_QI:
37128 case V8SF_FTYPE_V8HI_V8SF_QI:
37129 case V4SF_FTYPE_V8HI_V4SF_QI:
37130 case V8SI_FTYPE_V8SF_V8SI_QI:
37131 case V4SI_FTYPE_V4SF_V4SI_QI:
37132 case V8DI_FTYPE_V8SF_V8DI_QI:
37133 case V4DI_FTYPE_V4SF_V4DI_QI:
37134 case V2DI_FTYPE_V4SF_V2DI_QI:
37135 case V8SF_FTYPE_V8DI_V8SF_QI:
37136 case V4SF_FTYPE_V4DI_V4SF_QI:
37137 case V4SF_FTYPE_V2DI_V4SF_QI:
37138 case V8DF_FTYPE_V8DI_V8DF_QI:
37139 case V4DF_FTYPE_V4DI_V4DF_QI:
37140 case V2DF_FTYPE_V2DI_V2DF_QI:
37141 case V16QI_FTYPE_V8HI_V16QI_QI:
37142 case V16QI_FTYPE_V16HI_V16QI_HI:
37143 case V16QI_FTYPE_V4SI_V16QI_QI:
37144 case V16QI_FTYPE_V8SI_V16QI_QI:
37145 case V8HI_FTYPE_V4SI_V8HI_QI:
37146 case V8HI_FTYPE_V8SI_V8HI_QI:
37147 case V16QI_FTYPE_V2DI_V16QI_QI:
37148 case V16QI_FTYPE_V4DI_V16QI_QI:
37149 case V8HI_FTYPE_V2DI_V8HI_QI:
37150 case V8HI_FTYPE_V4DI_V8HI_QI:
37151 case V4SI_FTYPE_V2DI_V4SI_QI:
37152 case V4SI_FTYPE_V4DI_V4SI_QI:
37153 case V32QI_FTYPE_V32HI_V32QI_SI:
37154 case HI_FTYPE_V16QI_V16QI_HI:
37155 case SI_FTYPE_V32QI_V32QI_SI:
37156 case DI_FTYPE_V64QI_V64QI_DI:
37157 case QI_FTYPE_V8HI_V8HI_QI:
37158 case HI_FTYPE_V16HI_V16HI_HI:
37159 case SI_FTYPE_V32HI_V32HI_SI:
37160 case QI_FTYPE_V4SI_V4SI_QI:
37161 case QI_FTYPE_V8SI_V8SI_QI:
37162 case QI_FTYPE_V2DI_V2DI_QI:
37163 case QI_FTYPE_V4DI_V4DI_QI:
37164 case V4SF_FTYPE_V2DF_V4SF_QI:
37165 case V4SF_FTYPE_V4DF_V4SF_QI:
37166 case V16SI_FTYPE_V16SI_V16SI_HI:
37167 case V16SI_FTYPE_V16SI_V16SI_V16SI:
37168 case V16SI_FTYPE_V4SI_V16SI_HI:
37169 case V2DI_FTYPE_V2DI_V2DI_V2DI:
37170 case V2DI_FTYPE_V4SI_V2DI_QI:
37171 case V2DI_FTYPE_V8HI_V2DI_QI:
37172 case V2DI_FTYPE_V16QI_V2DI_QI:
37173 case V4DI_FTYPE_V4DI_V4DI_QI:
37174 case V4DI_FTYPE_V4SI_V4DI_QI:
37175 case V4DI_FTYPE_V8HI_V4DI_QI:
37176 case V4DI_FTYPE_V16QI_V4DI_QI:
37177 case V8DI_FTYPE_V8DF_V8DI_QI:
37178 case V4DI_FTYPE_V4DF_V4DI_QI:
37179 case V2DI_FTYPE_V2DF_V2DI_QI:
37180 case V4SI_FTYPE_V4DF_V4SI_QI:
37181 case V4SI_FTYPE_V2DF_V4SI_QI:
37182 case V4SI_FTYPE_V8HI_V4SI_QI:
37183 case V4SI_FTYPE_V16QI_V4SI_QI:
37184 case V8SI_FTYPE_V8SI_V8SI_V8SI:
37185 case V4DI_FTYPE_V4DI_V4DI_V4DI:
37186 case V8DF_FTYPE_V2DF_V8DF_QI:
37187 case V8DF_FTYPE_V4DF_V8DF_QI:
37188 case V8DF_FTYPE_V8DF_V8DF_QI:
37189 case V8DF_FTYPE_V8DF_V8DF_V8DF:
37190 case V8SF_FTYPE_V8SF_V8SF_QI:
37191 case V8SF_FTYPE_V8SI_V8SF_QI:
37192 case V4DF_FTYPE_V4DF_V4DF_QI:
37193 case V4SF_FTYPE_V4SF_V4SF_QI:
37194 case V2DF_FTYPE_V2DF_V2DF_QI:
37195 case V2DF_FTYPE_V4SF_V2DF_QI:
37196 case V2DF_FTYPE_V4SI_V2DF_QI:
37197 case V4SF_FTYPE_V4SI_V4SF_QI:
37198 case V4DF_FTYPE_V4SF_V4DF_QI:
37199 case V4DF_FTYPE_V4SI_V4DF_QI:
37200 case V8SI_FTYPE_V8SI_V8SI_QI:
37201 case V8SI_FTYPE_V8HI_V8SI_QI:
37202 case V8SI_FTYPE_V16QI_V8SI_QI:
37203 case V8DF_FTYPE_V8DF_V8DI_V8DF:
37204 case V8DF_FTYPE_V8DI_V8DF_V8DF:
37205 case V8DF_FTYPE_V8SF_V8DF_QI:
37206 case V8DF_FTYPE_V8SI_V8DF_QI:
37207 case V8DI_FTYPE_DI_V8DI_QI:
37208 case V16SF_FTYPE_V8SF_V16SF_HI:
37209 case V16SI_FTYPE_V8SI_V16SI_HI:
37210 case V16HI_FTYPE_V16HI_V16HI_HI:
37211 case V8HI_FTYPE_V16QI_V8HI_QI:
37212 case V16HI_FTYPE_V16QI_V16HI_HI:
37213 case V32HI_FTYPE_V32HI_V32HI_SI:
37214 case V32HI_FTYPE_V32QI_V32HI_SI:
37215 case V8DI_FTYPE_V16QI_V8DI_QI:
37216 case V8DI_FTYPE_V2DI_V8DI_QI:
37217 case V8DI_FTYPE_V4DI_V8DI_QI:
37218 case V8DI_FTYPE_V8DI_V8DI_QI:
37219 case V8DI_FTYPE_V8DI_V8DI_V8DI:
37220 case V8DI_FTYPE_V8HI_V8DI_QI:
37221 case V8DI_FTYPE_V8SI_V8DI_QI:
37222 case V8HI_FTYPE_V8DI_V8HI_QI:
37223 case V8SF_FTYPE_V8DF_V8SF_QI:
37224 case V8SI_FTYPE_V8DF_V8SI_QI:
37225 case V8SI_FTYPE_V8DI_V8SI_QI:
37226 case V4SI_FTYPE_V4SI_V4SI_V4SI:
37227 nargs = 3;
37228 break;
37229 case V32QI_FTYPE_V32QI_V32QI_INT:
37230 case V16HI_FTYPE_V16HI_V16HI_INT:
37231 case V16QI_FTYPE_V16QI_V16QI_INT:
37232 case V4DI_FTYPE_V4DI_V4DI_INT:
37233 case V8HI_FTYPE_V8HI_V8HI_INT:
37234 case V8SI_FTYPE_V8SI_V8SI_INT:
37235 case V8SI_FTYPE_V8SI_V4SI_INT:
37236 case V8SF_FTYPE_V8SF_V8SF_INT:
37237 case V8SF_FTYPE_V8SF_V4SF_INT:
37238 case V4SI_FTYPE_V4SI_V4SI_INT:
37239 case V4DF_FTYPE_V4DF_V4DF_INT:
37240 case V16SF_FTYPE_V16SF_V16SF_INT:
37241 case V16SF_FTYPE_V16SF_V4SF_INT:
37242 case V16SI_FTYPE_V16SI_V4SI_INT:
37243 case V4DF_FTYPE_V4DF_V2DF_INT:
37244 case V4SF_FTYPE_V4SF_V4SF_INT:
37245 case V2DI_FTYPE_V2DI_V2DI_INT:
37246 case V4DI_FTYPE_V4DI_V2DI_INT:
37247 case V2DF_FTYPE_V2DF_V2DF_INT:
37248 case QI_FTYPE_V8DI_V8DI_INT:
37249 case QI_FTYPE_V8DF_V8DF_INT:
37250 case QI_FTYPE_V2DF_V2DF_INT:
37251 case QI_FTYPE_V4SF_V4SF_INT:
37252 case HI_FTYPE_V16SI_V16SI_INT:
37253 case HI_FTYPE_V16SF_V16SF_INT:
37254 nargs = 3;
37255 nargs_constant = 1;
37256 break;
37257 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37258 nargs = 3;
37259 rmode = V4DImode;
37260 nargs_constant = 1;
37261 break;
37262 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37263 nargs = 3;
37264 rmode = V2DImode;
37265 nargs_constant = 1;
37266 break;
37267 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37268 nargs = 3;
37269 rmode = DImode;
37270 nargs_constant = 1;
37271 break;
37272 case V2DI_FTYPE_V2DI_UINT_UINT:
37273 nargs = 3;
37274 nargs_constant = 2;
37275 break;
37276 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37277 nargs = 3;
37278 rmode = V8DImode;
37279 nargs_constant = 1;
37280 break;
37281 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37282 nargs = 5;
37283 rmode = V8DImode;
37284 mask_pos = 2;
37285 nargs_constant = 1;
37286 break;
37287 case QI_FTYPE_V8DF_INT_QI:
37288 case QI_FTYPE_V4DF_INT_QI:
37289 case QI_FTYPE_V2DF_INT_QI:
37290 case HI_FTYPE_V16SF_INT_HI:
37291 case QI_FTYPE_V8SF_INT_QI:
37292 case QI_FTYPE_V4SF_INT_QI:
37293 nargs = 3;
37294 mask_pos = 1;
37295 nargs_constant = 1;
37296 break;
37297 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37298 nargs = 5;
37299 rmode = V4DImode;
37300 mask_pos = 2;
37301 nargs_constant = 1;
37302 break;
37303 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37304 nargs = 5;
37305 rmode = V2DImode;
37306 mask_pos = 2;
37307 nargs_constant = 1;
37308 break;
37309 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37310 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37311 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37312 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37313 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37314 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37315 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37316 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37317 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37318 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37319 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37320 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37321 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37322 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37323 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37324 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37325 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37326 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37327 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37328 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37329 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37330 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37331 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37332 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37333 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37334 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37335 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37336 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37337 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37338 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37339 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37340 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37341 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37342 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37343 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37344 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37345 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37346 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37347 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37348 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37349 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37350 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37351 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37352 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37353 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37354 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37355 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37356 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37357 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37358 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37359 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37360 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37361 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37362 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37363 nargs = 4;
37364 break;
37365 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37366 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37367 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37368 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37369 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37370 nargs = 4;
37371 nargs_constant = 1;
37372 break;
37373 case QI_FTYPE_V4DI_V4DI_INT_QI:
37374 case QI_FTYPE_V8SI_V8SI_INT_QI:
37375 case QI_FTYPE_V4DF_V4DF_INT_QI:
37376 case QI_FTYPE_V8SF_V8SF_INT_QI:
37377 case QI_FTYPE_V2DI_V2DI_INT_QI:
37378 case QI_FTYPE_V4SI_V4SI_INT_QI:
37379 case QI_FTYPE_V2DF_V2DF_INT_QI:
37380 case QI_FTYPE_V4SF_V4SF_INT_QI:
37381 case DI_FTYPE_V64QI_V64QI_INT_DI:
37382 case SI_FTYPE_V32QI_V32QI_INT_SI:
37383 case HI_FTYPE_V16QI_V16QI_INT_HI:
37384 case SI_FTYPE_V32HI_V32HI_INT_SI:
37385 case HI_FTYPE_V16HI_V16HI_INT_HI:
37386 case QI_FTYPE_V8HI_V8HI_INT_QI:
37387 nargs = 4;
37388 mask_pos = 1;
37389 nargs_constant = 1;
37390 break;
37391 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37392 nargs = 4;
37393 nargs_constant = 2;
37394 break;
37395 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37396 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37397 nargs = 4;
37398 break;
37399 case QI_FTYPE_V8DI_V8DI_INT_QI:
37400 case HI_FTYPE_V16SI_V16SI_INT_HI:
37401 case QI_FTYPE_V8DF_V8DF_INT_QI:
37402 case HI_FTYPE_V16SF_V16SF_INT_HI:
37403 mask_pos = 1;
37404 nargs = 4;
37405 nargs_constant = 1;
37406 break;
37407 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37408 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37409 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37410 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37411 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37412 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37413 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37414 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37415 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37416 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37417 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37418 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37419 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37420 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37421 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37422 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37423 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37424 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37425 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37426 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37427 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37428 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37429 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37430 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37431 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37432 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37433 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37434 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37435 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37436 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37437 nargs = 4;
37438 mask_pos = 2;
37439 nargs_constant = 1;
37440 break;
37441 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37442 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37443 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37444 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37445 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37446 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37447 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37448 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37449 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37450 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37451 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37452 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37453 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37454 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37455 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37456 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37457 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37458 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37459 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37460 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37461 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37462 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37463 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37464 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37465 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37466 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37467 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37468 nargs = 5;
37469 mask_pos = 2;
37470 nargs_constant = 1;
37471 break;
37472 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37473 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37474 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37475 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37476 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37477 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37478 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37479 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37480 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37481 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37482 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37483 nargs = 5;
37484 nargs = 5;
37485 mask_pos = 1;
37486 nargs_constant = 1;
37487 break;
37489 default:
37490 gcc_unreachable ();
37493 gcc_assert (nargs <= ARRAY_SIZE (args));
37495 if (comparison != UNKNOWN)
37497 gcc_assert (nargs == 2);
37498 return ix86_expand_sse_compare (d, exp, target, swap);
37501 if (rmode == VOIDmode || rmode == tmode)
37503 if (optimize
37504 || target == 0
37505 || GET_MODE (target) != tmode
37506 || !insn_p->operand[0].predicate (target, tmode))
37507 target = gen_reg_rtx (tmode);
37508 real_target = target;
37510 else
37512 real_target = gen_reg_rtx (tmode);
37513 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37516 for (i = 0; i < nargs; i++)
37518 tree arg = CALL_EXPR_ARG (exp, i);
37519 rtx op = expand_normal (arg);
37520 machine_mode mode = insn_p->operand[i + 1].mode;
37521 bool match = insn_p->operand[i + 1].predicate (op, mode);
37523 if (last_arg_count && (i + 1) == nargs)
37525 /* SIMD shift insns take either an 8-bit immediate or
37526 register as count. But builtin functions take int as
37527 count. If count doesn't match, we put it in register. */
37528 if (!match)
37530 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37531 if (!insn_p->operand[i + 1].predicate (op, mode))
37532 op = copy_to_reg (op);
37535 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37536 (!mask_pos && (nargs - i) <= nargs_constant))
37538 if (!match)
37539 switch (icode)
37541 case CODE_FOR_avx_vinsertf128v4di:
37542 case CODE_FOR_avx_vextractf128v4di:
37543 error ("the last argument must be an 1-bit immediate");
37544 return const0_rtx;
37546 case CODE_FOR_avx512f_cmpv8di3_mask:
37547 case CODE_FOR_avx512f_cmpv16si3_mask:
37548 case CODE_FOR_avx512f_ucmpv8di3_mask:
37549 case CODE_FOR_avx512f_ucmpv16si3_mask:
37550 case CODE_FOR_avx512vl_cmpv4di3_mask:
37551 case CODE_FOR_avx512vl_cmpv8si3_mask:
37552 case CODE_FOR_avx512vl_ucmpv4di3_mask:
37553 case CODE_FOR_avx512vl_ucmpv8si3_mask:
37554 case CODE_FOR_avx512vl_cmpv2di3_mask:
37555 case CODE_FOR_avx512vl_cmpv4si3_mask:
37556 case CODE_FOR_avx512vl_ucmpv2di3_mask:
37557 case CODE_FOR_avx512vl_ucmpv4si3_mask:
37558 error ("the last argument must be a 3-bit immediate");
37559 return const0_rtx;
37561 case CODE_FOR_sse4_1_roundsd:
37562 case CODE_FOR_sse4_1_roundss:
37564 case CODE_FOR_sse4_1_roundpd:
37565 case CODE_FOR_sse4_1_roundps:
37566 case CODE_FOR_avx_roundpd256:
37567 case CODE_FOR_avx_roundps256:
37569 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37570 case CODE_FOR_sse4_1_roundps_sfix:
37571 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37572 case CODE_FOR_avx_roundps_sfix256:
37574 case CODE_FOR_sse4_1_blendps:
37575 case CODE_FOR_avx_blendpd256:
37576 case CODE_FOR_avx_vpermilv4df:
37577 case CODE_FOR_avx_vpermilv4df_mask:
37578 case CODE_FOR_avx512f_getmantv8df_mask:
37579 case CODE_FOR_avx512f_getmantv16sf_mask:
37580 case CODE_FOR_avx512vl_getmantv8sf_mask:
37581 case CODE_FOR_avx512vl_getmantv4df_mask:
37582 case CODE_FOR_avx512vl_getmantv4sf_mask:
37583 case CODE_FOR_avx512vl_getmantv2df_mask:
37584 case CODE_FOR_avx512dq_rangepv8df_mask_round:
37585 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37586 case CODE_FOR_avx512dq_rangepv4df_mask:
37587 case CODE_FOR_avx512dq_rangepv8sf_mask:
37588 case CODE_FOR_avx512dq_rangepv2df_mask:
37589 case CODE_FOR_avx512dq_rangepv4sf_mask:
37590 case CODE_FOR_avx_shufpd256_mask:
37591 error ("the last argument must be a 4-bit immediate");
37592 return const0_rtx;
37594 case CODE_FOR_sha1rnds4:
37595 case CODE_FOR_sse4_1_blendpd:
37596 case CODE_FOR_avx_vpermilv2df:
37597 case CODE_FOR_avx_vpermilv2df_mask:
37598 case CODE_FOR_xop_vpermil2v2df3:
37599 case CODE_FOR_xop_vpermil2v4sf3:
37600 case CODE_FOR_xop_vpermil2v4df3:
37601 case CODE_FOR_xop_vpermil2v8sf3:
37602 case CODE_FOR_avx512f_vinsertf32x4_mask:
37603 case CODE_FOR_avx512f_vinserti32x4_mask:
37604 case CODE_FOR_avx512f_vextractf32x4_mask:
37605 case CODE_FOR_avx512f_vextracti32x4_mask:
37606 case CODE_FOR_sse2_shufpd:
37607 case CODE_FOR_sse2_shufpd_mask:
37608 case CODE_FOR_avx512dq_shuf_f64x2_mask:
37609 case CODE_FOR_avx512dq_shuf_i64x2_mask:
37610 case CODE_FOR_avx512vl_shuf_i32x4_mask:
37611 case CODE_FOR_avx512vl_shuf_f32x4_mask:
37612 error ("the last argument must be a 2-bit immediate");
37613 return const0_rtx;
37615 case CODE_FOR_avx_vextractf128v4df:
37616 case CODE_FOR_avx_vextractf128v8sf:
37617 case CODE_FOR_avx_vextractf128v8si:
37618 case CODE_FOR_avx_vinsertf128v4df:
37619 case CODE_FOR_avx_vinsertf128v8sf:
37620 case CODE_FOR_avx_vinsertf128v8si:
37621 case CODE_FOR_avx512f_vinsertf64x4_mask:
37622 case CODE_FOR_avx512f_vinserti64x4_mask:
37623 case CODE_FOR_avx512f_vextractf64x4_mask:
37624 case CODE_FOR_avx512f_vextracti64x4_mask:
37625 case CODE_FOR_avx512dq_vinsertf32x8_mask:
37626 case CODE_FOR_avx512dq_vinserti32x8_mask:
37627 case CODE_FOR_avx512vl_vinsertv4df:
37628 case CODE_FOR_avx512vl_vinsertv4di:
37629 case CODE_FOR_avx512vl_vinsertv8sf:
37630 case CODE_FOR_avx512vl_vinsertv8si:
37631 error ("the last argument must be a 1-bit immediate");
37632 return const0_rtx;
37634 case CODE_FOR_avx_vmcmpv2df3:
37635 case CODE_FOR_avx_vmcmpv4sf3:
37636 case CODE_FOR_avx_cmpv2df3:
37637 case CODE_FOR_avx_cmpv4sf3:
37638 case CODE_FOR_avx_cmpv4df3:
37639 case CODE_FOR_avx_cmpv8sf3:
37640 case CODE_FOR_avx512f_cmpv8df3_mask:
37641 case CODE_FOR_avx512f_cmpv16sf3_mask:
37642 case CODE_FOR_avx512f_vmcmpv2df3_mask:
37643 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37644 error ("the last argument must be a 5-bit immediate");
37645 return const0_rtx;
37647 default:
37648 switch (nargs_constant)
37650 case 2:
37651 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37652 (!mask_pos && (nargs - i) == nargs_constant))
37654 error ("the next to last argument must be an 8-bit immediate");
37655 break;
37657 case 1:
37658 error ("the last argument must be an 8-bit immediate");
37659 break;
37660 default:
37661 gcc_unreachable ();
37663 return const0_rtx;
37666 else
37668 if (VECTOR_MODE_P (mode))
37669 op = safe_vector_operand (op, mode);
37671 /* If we aren't optimizing, only allow one memory operand to
37672 be generated. */
37673 if (memory_operand (op, mode))
37674 num_memory++;
37676 op = fixup_modeless_constant (op, mode);
37678 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37680 if (optimize || !match || num_memory > 1)
37681 op = copy_to_mode_reg (mode, op);
37683 else
37685 op = copy_to_reg (op);
37686 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37690 args[i].op = op;
37691 args[i].mode = mode;
37694 switch (nargs)
37696 case 1:
37697 pat = GEN_FCN (icode) (real_target, args[0].op);
37698 break;
37699 case 2:
37700 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37701 break;
37702 case 3:
37703 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37704 args[2].op);
37705 break;
37706 case 4:
37707 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37708 args[2].op, args[3].op);
37709 break;
37710 case 5:
37711 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37712 args[2].op, args[3].op, args[4].op);
37713 case 6:
37714 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37715 args[2].op, args[3].op, args[4].op,
37716 args[5].op);
37717 break;
37718 default:
37719 gcc_unreachable ();
37722 if (! pat)
37723 return 0;
37725 emit_insn (pat);
37726 return target;
37729 /* Transform pattern of following layout:
37730 (parallel [
37731 set (A B)
37732 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37734 into:
37735 (set (A B))
37738 (parallel [ A B
37740 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37743 into:
37744 (parallel [ A B ... ]) */
37746 static rtx
37747 ix86_erase_embedded_rounding (rtx pat)
37749 if (GET_CODE (pat) == INSN)
37750 pat = PATTERN (pat);
37752 gcc_assert (GET_CODE (pat) == PARALLEL);
37754 if (XVECLEN (pat, 0) == 2)
37756 rtx p0 = XVECEXP (pat, 0, 0);
37757 rtx p1 = XVECEXP (pat, 0, 1);
37759 gcc_assert (GET_CODE (p0) == SET
37760 && GET_CODE (p1) == UNSPEC
37761 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37763 return p0;
37765 else
37767 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37768 int i = 0;
37769 int j = 0;
37771 for (; i < XVECLEN (pat, 0); ++i)
37773 rtx elem = XVECEXP (pat, 0, i);
37774 if (GET_CODE (elem) != UNSPEC
37775 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37776 res [j++] = elem;
37779 /* No more than 1 occurence was removed. */
37780 gcc_assert (j >= XVECLEN (pat, 0) - 1);
37782 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37786 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
37787 with rounding. */
37788 static rtx
37789 ix86_expand_sse_comi_round (const struct builtin_description *d,
37790 tree exp, rtx target)
37792 rtx pat, set_dst;
37793 tree arg0 = CALL_EXPR_ARG (exp, 0);
37794 tree arg1 = CALL_EXPR_ARG (exp, 1);
37795 tree arg2 = CALL_EXPR_ARG (exp, 2);
37796 tree arg3 = CALL_EXPR_ARG (exp, 3);
37797 rtx op0 = expand_normal (arg0);
37798 rtx op1 = expand_normal (arg1);
37799 rtx op2 = expand_normal (arg2);
37800 rtx op3 = expand_normal (arg3);
37801 enum insn_code icode = d->icode;
37802 const struct insn_data_d *insn_p = &insn_data[icode];
37803 machine_mode mode0 = insn_p->operand[0].mode;
37804 machine_mode mode1 = insn_p->operand[1].mode;
37805 enum rtx_code comparison = UNEQ;
37806 bool need_ucomi = false;
37808 /* See avxintrin.h for values. */
37809 enum rtx_code comi_comparisons[32] =
37811 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37812 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37813 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37815 bool need_ucomi_values[32] =
37817 true, false, false, true, true, false, false, true,
37818 true, false, false, true, true, false, false, true,
37819 false, true, true, false, false, true, true, false,
37820 false, true, true, false, false, true, true, false
37823 if (!CONST_INT_P (op2))
37825 error ("the third argument must be comparison constant");
37826 return const0_rtx;
37828 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37830 error ("incorrect comparison mode");
37831 return const0_rtx;
37834 if (!insn_p->operand[2].predicate (op3, SImode))
37836 error ("incorrect rounding operand");
37837 return const0_rtx;
37840 comparison = comi_comparisons[INTVAL (op2)];
37841 need_ucomi = need_ucomi_values[INTVAL (op2)];
37843 if (VECTOR_MODE_P (mode0))
37844 op0 = safe_vector_operand (op0, mode0);
37845 if (VECTOR_MODE_P (mode1))
37846 op1 = safe_vector_operand (op1, mode1);
37848 target = gen_reg_rtx (SImode);
37849 emit_move_insn (target, const0_rtx);
37850 target = gen_rtx_SUBREG (QImode, target, 0);
37852 if ((optimize && !register_operand (op0, mode0))
37853 || !insn_p->operand[0].predicate (op0, mode0))
37854 op0 = copy_to_mode_reg (mode0, op0);
37855 if ((optimize && !register_operand (op1, mode1))
37856 || !insn_p->operand[1].predicate (op1, mode1))
37857 op1 = copy_to_mode_reg (mode1, op1);
37859 if (need_ucomi)
37860 icode = icode == CODE_FOR_sse_comi_round
37861 ? CODE_FOR_sse_ucomi_round
37862 : CODE_FOR_sse2_ucomi_round;
37864 pat = GEN_FCN (icode) (op0, op1, op3);
37865 if (! pat)
37866 return 0;
37868 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
37869 if (INTVAL (op3) == NO_ROUND)
37871 pat = ix86_erase_embedded_rounding (pat);
37872 if (! pat)
37873 return 0;
37875 set_dst = SET_DEST (pat);
37877 else
37879 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37880 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37883 emit_insn (pat);
37884 emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37885 gen_rtx_fmt_ee (comparison, QImode,
37886 set_dst,
37887 const0_rtx)));
37889 return SUBREG_REG (target);
37892 static rtx
37893 ix86_expand_round_builtin (const struct builtin_description *d,
37894 tree exp, rtx target)
37896 rtx pat;
37897 unsigned int i, nargs;
37898 struct
37900 rtx op;
37901 machine_mode mode;
37902 } args[6];
37903 enum insn_code icode = d->icode;
37904 const struct insn_data_d *insn_p = &insn_data[icode];
37905 machine_mode tmode = insn_p->operand[0].mode;
37906 unsigned int nargs_constant = 0;
37907 unsigned int redundant_embed_rnd = 0;
37909 switch ((enum ix86_builtin_func_type) d->flag)
37911 case UINT64_FTYPE_V2DF_INT:
37912 case UINT64_FTYPE_V4SF_INT:
37913 case UINT_FTYPE_V2DF_INT:
37914 case UINT_FTYPE_V4SF_INT:
37915 case INT64_FTYPE_V2DF_INT:
37916 case INT64_FTYPE_V4SF_INT:
37917 case INT_FTYPE_V2DF_INT:
37918 case INT_FTYPE_V4SF_INT:
37919 nargs = 2;
37920 break;
37921 case V4SF_FTYPE_V4SF_UINT_INT:
37922 case V4SF_FTYPE_V4SF_UINT64_INT:
37923 case V2DF_FTYPE_V2DF_UINT64_INT:
37924 case V4SF_FTYPE_V4SF_INT_INT:
37925 case V4SF_FTYPE_V4SF_INT64_INT:
37926 case V2DF_FTYPE_V2DF_INT64_INT:
37927 case V4SF_FTYPE_V4SF_V4SF_INT:
37928 case V2DF_FTYPE_V2DF_V2DF_INT:
37929 case V4SF_FTYPE_V4SF_V2DF_INT:
37930 case V2DF_FTYPE_V2DF_V4SF_INT:
37931 nargs = 3;
37932 break;
37933 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37934 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37935 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37936 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37937 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37938 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37939 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37940 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37941 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37942 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37943 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37944 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37945 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37946 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37947 nargs = 4;
37948 break;
37949 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37950 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37951 nargs_constant = 2;
37952 nargs = 4;
37953 break;
37954 case INT_FTYPE_V4SF_V4SF_INT_INT:
37955 case INT_FTYPE_V2DF_V2DF_INT_INT:
37956 return ix86_expand_sse_comi_round (d, exp, target);
37957 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37958 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37959 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37960 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
37961 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
37962 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
37963 nargs = 5;
37964 break;
37965 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
37966 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
37967 nargs_constant = 4;
37968 nargs = 5;
37969 break;
37970 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
37971 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
37972 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
37973 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
37974 nargs_constant = 3;
37975 nargs = 5;
37976 break;
37977 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
37978 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
37979 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
37980 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
37981 nargs = 6;
37982 nargs_constant = 4;
37983 break;
37984 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37985 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37986 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37987 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37988 nargs = 6;
37989 nargs_constant = 3;
37990 break;
37991 default:
37992 gcc_unreachable ();
37994 gcc_assert (nargs <= ARRAY_SIZE (args));
37996 if (optimize
37997 || target == 0
37998 || GET_MODE (target) != tmode
37999 || !insn_p->operand[0].predicate (target, tmode))
38000 target = gen_reg_rtx (tmode);
38002 for (i = 0; i < nargs; i++)
38004 tree arg = CALL_EXPR_ARG (exp, i);
38005 rtx op = expand_normal (arg);
38006 machine_mode mode = insn_p->operand[i + 1].mode;
38007 bool match = insn_p->operand[i + 1].predicate (op, mode);
38009 if (i == nargs - nargs_constant)
38011 if (!match)
38013 switch (icode)
38015 case CODE_FOR_avx512f_getmantv8df_mask_round:
38016 case CODE_FOR_avx512f_getmantv16sf_mask_round:
38017 case CODE_FOR_avx512f_vgetmantv2df_round:
38018 case CODE_FOR_avx512f_vgetmantv4sf_round:
38019 error ("the immediate argument must be a 4-bit immediate");
38020 return const0_rtx;
38021 case CODE_FOR_avx512f_cmpv8df3_mask_round:
38022 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
38023 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
38024 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
38025 error ("the immediate argument must be a 5-bit immediate");
38026 return const0_rtx;
38027 default:
38028 error ("the immediate argument must be an 8-bit immediate");
38029 return const0_rtx;
38033 else if (i == nargs-1)
38035 if (!insn_p->operand[nargs].predicate (op, SImode))
38037 error ("incorrect rounding operand");
38038 return const0_rtx;
38041 /* If there is no rounding use normal version of the pattern. */
38042 if (INTVAL (op) == NO_ROUND)
38043 redundant_embed_rnd = 1;
38045 else
38047 if (VECTOR_MODE_P (mode))
38048 op = safe_vector_operand (op, mode);
38050 op = fixup_modeless_constant (op, mode);
38052 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38054 if (optimize || !match)
38055 op = copy_to_mode_reg (mode, op);
38057 else
38059 op = copy_to_reg (op);
38060 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38064 args[i].op = op;
38065 args[i].mode = mode;
38068 switch (nargs)
38070 case 1:
38071 pat = GEN_FCN (icode) (target, args[0].op);
38072 break;
38073 case 2:
38074 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38075 break;
38076 case 3:
38077 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38078 args[2].op);
38079 break;
38080 case 4:
38081 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38082 args[2].op, args[3].op);
38083 break;
38084 case 5:
38085 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38086 args[2].op, args[3].op, args[4].op);
38087 case 6:
38088 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38089 args[2].op, args[3].op, args[4].op,
38090 args[5].op);
38091 break;
38092 default:
38093 gcc_unreachable ();
38096 if (!pat)
38097 return 0;
38099 if (redundant_embed_rnd)
38100 pat = ix86_erase_embedded_rounding (pat);
38102 emit_insn (pat);
38103 return target;
38106 /* Subroutine of ix86_expand_builtin to take care of special insns
38107 with variable number of operands. */
38109 static rtx
38110 ix86_expand_special_args_builtin (const struct builtin_description *d,
38111 tree exp, rtx target)
38113 tree arg;
38114 rtx pat, op;
38115 unsigned int i, nargs, arg_adjust, memory;
38116 bool aligned_mem = false;
38117 struct
38119 rtx op;
38120 machine_mode mode;
38121 } args[3];
38122 enum insn_code icode = d->icode;
38123 bool last_arg_constant = false;
38124 const struct insn_data_d *insn_p = &insn_data[icode];
38125 machine_mode tmode = insn_p->operand[0].mode;
38126 enum { load, store } klass;
38128 switch ((enum ix86_builtin_func_type) d->flag)
38130 case VOID_FTYPE_VOID:
38131 emit_insn (GEN_FCN (icode) (target));
38132 return 0;
38133 case VOID_FTYPE_UINT64:
38134 case VOID_FTYPE_UNSIGNED:
38135 nargs = 0;
38136 klass = store;
38137 memory = 0;
38138 break;
38140 case INT_FTYPE_VOID:
38141 case USHORT_FTYPE_VOID:
38142 case UINT64_FTYPE_VOID:
38143 case UNSIGNED_FTYPE_VOID:
38144 nargs = 0;
38145 klass = load;
38146 memory = 0;
38147 break;
38148 case UINT64_FTYPE_PUNSIGNED:
38149 case V2DI_FTYPE_PV2DI:
38150 case V4DI_FTYPE_PV4DI:
38151 case V32QI_FTYPE_PCCHAR:
38152 case V16QI_FTYPE_PCCHAR:
38153 case V8SF_FTYPE_PCV4SF:
38154 case V8SF_FTYPE_PCFLOAT:
38155 case V4SF_FTYPE_PCFLOAT:
38156 case V4DF_FTYPE_PCV2DF:
38157 case V4DF_FTYPE_PCDOUBLE:
38158 case V2DF_FTYPE_PCDOUBLE:
38159 case VOID_FTYPE_PVOID:
38160 case V16SI_FTYPE_PV4SI:
38161 case V16SF_FTYPE_PV4SF:
38162 case V8DI_FTYPE_PV4DI:
38163 case V8DI_FTYPE_PV8DI:
38164 case V8DF_FTYPE_PV4DF:
38165 nargs = 1;
38166 klass = load;
38167 memory = 0;
38168 switch (icode)
38170 case CODE_FOR_sse4_1_movntdqa:
38171 case CODE_FOR_avx2_movntdqa:
38172 case CODE_FOR_avx512f_movntdqa:
38173 aligned_mem = true;
38174 break;
38175 default:
38176 break;
38178 break;
38179 case VOID_FTYPE_PV2SF_V4SF:
38180 case VOID_FTYPE_PV8DI_V8DI:
38181 case VOID_FTYPE_PV4DI_V4DI:
38182 case VOID_FTYPE_PV2DI_V2DI:
38183 case VOID_FTYPE_PCHAR_V32QI:
38184 case VOID_FTYPE_PCHAR_V16QI:
38185 case VOID_FTYPE_PFLOAT_V16SF:
38186 case VOID_FTYPE_PFLOAT_V8SF:
38187 case VOID_FTYPE_PFLOAT_V4SF:
38188 case VOID_FTYPE_PDOUBLE_V8DF:
38189 case VOID_FTYPE_PDOUBLE_V4DF:
38190 case VOID_FTYPE_PDOUBLE_V2DF:
38191 case VOID_FTYPE_PLONGLONG_LONGLONG:
38192 case VOID_FTYPE_PULONGLONG_ULONGLONG:
38193 case VOID_FTYPE_PINT_INT:
38194 nargs = 1;
38195 klass = store;
38196 /* Reserve memory operand for target. */
38197 memory = ARRAY_SIZE (args);
38198 switch (icode)
38200 /* These builtins and instructions require the memory
38201 to be properly aligned. */
38202 case CODE_FOR_avx_movntv4di:
38203 case CODE_FOR_sse2_movntv2di:
38204 case CODE_FOR_avx_movntv8sf:
38205 case CODE_FOR_sse_movntv4sf:
38206 case CODE_FOR_sse4a_vmmovntv4sf:
38207 case CODE_FOR_avx_movntv4df:
38208 case CODE_FOR_sse2_movntv2df:
38209 case CODE_FOR_sse4a_vmmovntv2df:
38210 case CODE_FOR_sse2_movntidi:
38211 case CODE_FOR_sse_movntq:
38212 case CODE_FOR_sse2_movntisi:
38213 case CODE_FOR_avx512f_movntv16sf:
38214 case CODE_FOR_avx512f_movntv8df:
38215 case CODE_FOR_avx512f_movntv8di:
38216 aligned_mem = true;
38217 break;
38218 default:
38219 break;
38221 break;
38222 case V4SF_FTYPE_V4SF_PCV2SF:
38223 case V2DF_FTYPE_V2DF_PCDOUBLE:
38224 nargs = 2;
38225 klass = load;
38226 memory = 1;
38227 break;
38228 case V8SF_FTYPE_PCV8SF_V8SI:
38229 case V4DF_FTYPE_PCV4DF_V4DI:
38230 case V4SF_FTYPE_PCV4SF_V4SI:
38231 case V2DF_FTYPE_PCV2DF_V2DI:
38232 case V8SI_FTYPE_PCV8SI_V8SI:
38233 case V4DI_FTYPE_PCV4DI_V4DI:
38234 case V4SI_FTYPE_PCV4SI_V4SI:
38235 case V2DI_FTYPE_PCV2DI_V2DI:
38236 nargs = 2;
38237 klass = load;
38238 memory = 0;
38239 break;
38240 case VOID_FTYPE_PV8DF_V8DF_QI:
38241 case VOID_FTYPE_PV16SF_V16SF_HI:
38242 case VOID_FTYPE_PV8DI_V8DI_QI:
38243 case VOID_FTYPE_PV4DI_V4DI_QI:
38244 case VOID_FTYPE_PV2DI_V2DI_QI:
38245 case VOID_FTYPE_PV16SI_V16SI_HI:
38246 case VOID_FTYPE_PV8SI_V8SI_QI:
38247 case VOID_FTYPE_PV4SI_V4SI_QI:
38248 switch (icode)
38250 /* These builtins and instructions require the memory
38251 to be properly aligned. */
38252 case CODE_FOR_avx512f_storev16sf_mask:
38253 case CODE_FOR_avx512f_storev16si_mask:
38254 case CODE_FOR_avx512f_storev8df_mask:
38255 case CODE_FOR_avx512f_storev8di_mask:
38256 case CODE_FOR_avx512vl_storev8sf_mask:
38257 case CODE_FOR_avx512vl_storev8si_mask:
38258 case CODE_FOR_avx512vl_storev4df_mask:
38259 case CODE_FOR_avx512vl_storev4di_mask:
38260 case CODE_FOR_avx512vl_storev4sf_mask:
38261 case CODE_FOR_avx512vl_storev4si_mask:
38262 case CODE_FOR_avx512vl_storev2df_mask:
38263 case CODE_FOR_avx512vl_storev2di_mask:
38264 aligned_mem = true;
38265 break;
38266 default:
38267 break;
38269 /* FALLTHRU */
38270 case VOID_FTYPE_PV8SF_V8SI_V8SF:
38271 case VOID_FTYPE_PV4DF_V4DI_V4DF:
38272 case VOID_FTYPE_PV4SF_V4SI_V4SF:
38273 case VOID_FTYPE_PV2DF_V2DI_V2DF:
38274 case VOID_FTYPE_PV8SI_V8SI_V8SI:
38275 case VOID_FTYPE_PV4DI_V4DI_V4DI:
38276 case VOID_FTYPE_PV4SI_V4SI_V4SI:
38277 case VOID_FTYPE_PV2DI_V2DI_V2DI:
38278 case VOID_FTYPE_PDOUBLE_V2DF_QI:
38279 case VOID_FTYPE_PFLOAT_V4SF_QI:
38280 case VOID_FTYPE_PV8SI_V8DI_QI:
38281 case VOID_FTYPE_PV8HI_V8DI_QI:
38282 case VOID_FTYPE_PV16HI_V16SI_HI:
38283 case VOID_FTYPE_PV16QI_V8DI_QI:
38284 case VOID_FTYPE_PV16QI_V16SI_HI:
38285 case VOID_FTYPE_PV4SI_V4DI_QI:
38286 case VOID_FTYPE_PV4SI_V2DI_QI:
38287 case VOID_FTYPE_PV8HI_V4DI_QI:
38288 case VOID_FTYPE_PV8HI_V2DI_QI:
38289 case VOID_FTYPE_PV8HI_V8SI_QI:
38290 case VOID_FTYPE_PV8HI_V4SI_QI:
38291 case VOID_FTYPE_PV16QI_V4DI_QI:
38292 case VOID_FTYPE_PV16QI_V2DI_QI:
38293 case VOID_FTYPE_PV16QI_V8SI_QI:
38294 case VOID_FTYPE_PV16QI_V4SI_QI:
38295 case VOID_FTYPE_PV8HI_V8HI_QI:
38296 case VOID_FTYPE_PV16HI_V16HI_HI:
38297 case VOID_FTYPE_PV32HI_V32HI_SI:
38298 case VOID_FTYPE_PV16QI_V16QI_HI:
38299 case VOID_FTYPE_PV32QI_V32QI_SI:
38300 case VOID_FTYPE_PV64QI_V64QI_DI:
38301 case VOID_FTYPE_PV4DF_V4DF_QI:
38302 case VOID_FTYPE_PV2DF_V2DF_QI:
38303 case VOID_FTYPE_PV8SF_V8SF_QI:
38304 case VOID_FTYPE_PV4SF_V4SF_QI:
38305 nargs = 2;
38306 klass = store;
38307 /* Reserve memory operand for target. */
38308 memory = ARRAY_SIZE (args);
38309 break;
38310 case V4SF_FTYPE_PCV4SF_V4SF_QI:
38311 case V8SF_FTYPE_PCV8SF_V8SF_QI:
38312 case V16SF_FTYPE_PCV16SF_V16SF_HI:
38313 case V4SI_FTYPE_PCV4SI_V4SI_QI:
38314 case V8SI_FTYPE_PCV8SI_V8SI_QI:
38315 case V16SI_FTYPE_PCV16SI_V16SI_HI:
38316 case V2DF_FTYPE_PCV2DF_V2DF_QI:
38317 case V4DF_FTYPE_PCV4DF_V4DF_QI:
38318 case V8DF_FTYPE_PCV8DF_V8DF_QI:
38319 case V2DI_FTYPE_PCV2DI_V2DI_QI:
38320 case V4DI_FTYPE_PCV4DI_V4DI_QI:
38321 case V8DI_FTYPE_PCV8DI_V8DI_QI:
38322 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38323 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38324 case V8HI_FTYPE_PCV8HI_V8HI_QI:
38325 case V16HI_FTYPE_PCV16HI_V16HI_HI:
38326 case V32HI_FTYPE_PCV32HI_V32HI_SI:
38327 case V16QI_FTYPE_PCV16QI_V16QI_HI:
38328 case V32QI_FTYPE_PCV32QI_V32QI_SI:
38329 case V64QI_FTYPE_PCV64QI_V64QI_DI:
38330 nargs = 3;
38331 klass = load;
38332 memory = 0;
38333 switch (icode)
38335 /* These builtins and instructions require the memory
38336 to be properly aligned. */
38337 case CODE_FOR_avx512f_loadv16sf_mask:
38338 case CODE_FOR_avx512f_loadv16si_mask:
38339 case CODE_FOR_avx512f_loadv8df_mask:
38340 case CODE_FOR_avx512f_loadv8di_mask:
38341 case CODE_FOR_avx512vl_loadv8sf_mask:
38342 case CODE_FOR_avx512vl_loadv8si_mask:
38343 case CODE_FOR_avx512vl_loadv4df_mask:
38344 case CODE_FOR_avx512vl_loadv4di_mask:
38345 case CODE_FOR_avx512vl_loadv4sf_mask:
38346 case CODE_FOR_avx512vl_loadv4si_mask:
38347 case CODE_FOR_avx512vl_loadv2df_mask:
38348 case CODE_FOR_avx512vl_loadv2di_mask:
38349 case CODE_FOR_avx512bw_loadv64qi_mask:
38350 case CODE_FOR_avx512vl_loadv32qi_mask:
38351 case CODE_FOR_avx512vl_loadv16qi_mask:
38352 case CODE_FOR_avx512bw_loadv32hi_mask:
38353 case CODE_FOR_avx512vl_loadv16hi_mask:
38354 case CODE_FOR_avx512vl_loadv8hi_mask:
38355 aligned_mem = true;
38356 break;
38357 default:
38358 break;
38360 break;
38361 case VOID_FTYPE_UINT_UINT_UINT:
38362 case VOID_FTYPE_UINT64_UINT_UINT:
38363 case UCHAR_FTYPE_UINT_UINT_UINT:
38364 case UCHAR_FTYPE_UINT64_UINT_UINT:
38365 nargs = 3;
38366 klass = load;
38367 memory = ARRAY_SIZE (args);
38368 last_arg_constant = true;
38369 break;
38370 default:
38371 gcc_unreachable ();
38374 gcc_assert (nargs <= ARRAY_SIZE (args));
38376 if (klass == store)
38378 arg = CALL_EXPR_ARG (exp, 0);
38379 op = expand_normal (arg);
38380 gcc_assert (target == 0);
38381 if (memory)
38383 op = ix86_zero_extend_to_Pmode (op);
38384 target = gen_rtx_MEM (tmode, op);
38385 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38386 on it. Try to improve it using get_pointer_alignment,
38387 and if the special builtin is one that requires strict
38388 mode alignment, also from it's GET_MODE_ALIGNMENT.
38389 Failure to do so could lead to ix86_legitimate_combined_insn
38390 rejecting all changes to such insns. */
38391 unsigned int align = get_pointer_alignment (arg);
38392 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38393 align = GET_MODE_ALIGNMENT (tmode);
38394 if (MEM_ALIGN (target) < align)
38395 set_mem_align (target, align);
38397 else
38398 target = force_reg (tmode, op);
38399 arg_adjust = 1;
38401 else
38403 arg_adjust = 0;
38404 if (optimize
38405 || target == 0
38406 || !register_operand (target, tmode)
38407 || GET_MODE (target) != tmode)
38408 target = gen_reg_rtx (tmode);
38411 for (i = 0; i < nargs; i++)
38413 machine_mode mode = insn_p->operand[i + 1].mode;
38414 bool match;
38416 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38417 op = expand_normal (arg);
38418 match = insn_p->operand[i + 1].predicate (op, mode);
38420 if (last_arg_constant && (i + 1) == nargs)
38422 if (!match)
38424 if (icode == CODE_FOR_lwp_lwpvalsi3
38425 || icode == CODE_FOR_lwp_lwpinssi3
38426 || icode == CODE_FOR_lwp_lwpvaldi3
38427 || icode == CODE_FOR_lwp_lwpinsdi3)
38428 error ("the last argument must be a 32-bit immediate");
38429 else
38430 error ("the last argument must be an 8-bit immediate");
38431 return const0_rtx;
38434 else
38436 if (i == memory)
38438 /* This must be the memory operand. */
38439 op = ix86_zero_extend_to_Pmode (op);
38440 op = gen_rtx_MEM (mode, op);
38441 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38442 on it. Try to improve it using get_pointer_alignment,
38443 and if the special builtin is one that requires strict
38444 mode alignment, also from it's GET_MODE_ALIGNMENT.
38445 Failure to do so could lead to ix86_legitimate_combined_insn
38446 rejecting all changes to such insns. */
38447 unsigned int align = get_pointer_alignment (arg);
38448 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38449 align = GET_MODE_ALIGNMENT (mode);
38450 if (MEM_ALIGN (op) < align)
38451 set_mem_align (op, align);
38453 else
38455 /* This must be register. */
38456 if (VECTOR_MODE_P (mode))
38457 op = safe_vector_operand (op, mode);
38459 op = fixup_modeless_constant (op, mode);
38461 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38462 op = copy_to_mode_reg (mode, op);
38463 else
38465 op = copy_to_reg (op);
38466 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38471 args[i].op = op;
38472 args[i].mode = mode;
38475 switch (nargs)
38477 case 0:
38478 pat = GEN_FCN (icode) (target);
38479 break;
38480 case 1:
38481 pat = GEN_FCN (icode) (target, args[0].op);
38482 break;
38483 case 2:
38484 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38485 break;
38486 case 3:
38487 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38488 break;
38489 default:
38490 gcc_unreachable ();
38493 if (! pat)
38494 return 0;
38495 emit_insn (pat);
38496 return klass == store ? 0 : target;
38499 /* Return the integer constant in ARG. Constrain it to be in the range
38500 of the subparts of VEC_TYPE; issue an error if not. */
38502 static int
38503 get_element_number (tree vec_type, tree arg)
38505 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38507 if (!tree_fits_uhwi_p (arg)
38508 || (elt = tree_to_uhwi (arg), elt > max))
38510 error ("selector must be an integer constant in the range 0..%wi", max);
38511 return 0;
38514 return elt;
38517 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38518 ix86_expand_vector_init. We DO have language-level syntax for this, in
38519 the form of (type){ init-list }. Except that since we can't place emms
38520 instructions from inside the compiler, we can't allow the use of MMX
38521 registers unless the user explicitly asks for it. So we do *not* define
38522 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
38523 we have builtins invoked by mmintrin.h that gives us license to emit
38524 these sorts of instructions. */
38526 static rtx
38527 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38529 machine_mode tmode = TYPE_MODE (type);
38530 machine_mode inner_mode = GET_MODE_INNER (tmode);
38531 int i, n_elt = GET_MODE_NUNITS (tmode);
38532 rtvec v = rtvec_alloc (n_elt);
38534 gcc_assert (VECTOR_MODE_P (tmode));
38535 gcc_assert (call_expr_nargs (exp) == n_elt);
38537 for (i = 0; i < n_elt; ++i)
38539 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38540 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38543 if (!target || !register_operand (target, tmode))
38544 target = gen_reg_rtx (tmode);
38546 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38547 return target;
38550 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38551 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
38552 had a language-level syntax for referencing vector elements. */
38554 static rtx
38555 ix86_expand_vec_ext_builtin (tree exp, rtx target)
38557 machine_mode tmode, mode0;
38558 tree arg0, arg1;
38559 int elt;
38560 rtx op0;
38562 arg0 = CALL_EXPR_ARG (exp, 0);
38563 arg1 = CALL_EXPR_ARG (exp, 1);
38565 op0 = expand_normal (arg0);
38566 elt = get_element_number (TREE_TYPE (arg0), arg1);
38568 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38569 mode0 = TYPE_MODE (TREE_TYPE (arg0));
38570 gcc_assert (VECTOR_MODE_P (mode0));
38572 op0 = force_reg (mode0, op0);
38574 if (optimize || !target || !register_operand (target, tmode))
38575 target = gen_reg_rtx (tmode);
38577 ix86_expand_vector_extract (true, target, op0, elt);
38579 return target;
38582 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
38583 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
38584 a language-level syntax for referencing vector elements. */
38586 static rtx
38587 ix86_expand_vec_set_builtin (tree exp)
38589 machine_mode tmode, mode1;
38590 tree arg0, arg1, arg2;
38591 int elt;
38592 rtx op0, op1, target;
38594 arg0 = CALL_EXPR_ARG (exp, 0);
38595 arg1 = CALL_EXPR_ARG (exp, 1);
38596 arg2 = CALL_EXPR_ARG (exp, 2);
38598 tmode = TYPE_MODE (TREE_TYPE (arg0));
38599 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38600 gcc_assert (VECTOR_MODE_P (tmode));
38602 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38603 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38604 elt = get_element_number (TREE_TYPE (arg0), arg2);
38606 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38607 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38609 op0 = force_reg (tmode, op0);
38610 op1 = force_reg (mode1, op1);
38612 /* OP0 is the source of these builtin functions and shouldn't be
38613 modified. Create a copy, use it and return it as target. */
38614 target = gen_reg_rtx (tmode);
38615 emit_move_insn (target, op0);
38616 ix86_expand_vector_set (true, target, op1, elt);
38618 return target;
38621 /* Emit conditional move of SRC to DST with condition
38622 OP1 CODE OP2. */
38623 static void
38624 ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38626 rtx t;
38628 if (TARGET_CMOVE)
38630 t = ix86_expand_compare (code, op1, op2);
38631 emit_insn (gen_rtx_SET (dst, gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38632 src, dst)));
38634 else
38636 rtx_code_label *nomove = gen_label_rtx ();
38637 emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38638 const0_rtx, GET_MODE (op1), 1, nomove);
38639 emit_move_insn (dst, src);
38640 emit_label (nomove);
38644 /* Choose max of DST and SRC and put it to DST. */
38645 static void
38646 ix86_emit_move_max (rtx dst, rtx src)
38648 ix86_emit_cmove (dst, src, LTU, dst, src);
38651 /* Expand an expression EXP that calls a built-in function,
38652 with result going to TARGET if that's convenient
38653 (and in mode MODE if that's convenient).
38654 SUBTARGET may be used as the target for computing one of EXP's operands.
38655 IGNORE is nonzero if the value is to be ignored. */
38657 static rtx
38658 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38659 machine_mode mode, int ignore)
38661 const struct builtin_description *d;
38662 size_t i;
38663 enum insn_code icode;
38664 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38665 tree arg0, arg1, arg2, arg3, arg4;
38666 rtx op0, op1, op2, op3, op4, pat, insn;
38667 machine_mode mode0, mode1, mode2, mode3, mode4;
38668 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38670 /* For CPU builtins that can be folded, fold first and expand the fold. */
38671 switch (fcode)
38673 case IX86_BUILTIN_CPU_INIT:
38675 /* Make it call __cpu_indicator_init in libgcc. */
38676 tree call_expr, fndecl, type;
38677 type = build_function_type_list (integer_type_node, NULL_TREE);
38678 fndecl = build_fn_decl ("__cpu_indicator_init", type);
38679 call_expr = build_call_expr (fndecl, 0);
38680 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38682 case IX86_BUILTIN_CPU_IS:
38683 case IX86_BUILTIN_CPU_SUPPORTS:
38685 tree arg0 = CALL_EXPR_ARG (exp, 0);
38686 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38687 gcc_assert (fold_expr != NULL_TREE);
38688 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38692 /* Determine whether the builtin function is available under the current ISA.
38693 Originally the builtin was not created if it wasn't applicable to the
38694 current ISA based on the command line switches. With function specific
38695 options, we need to check in the context of the function making the call
38696 whether it is supported. */
38697 if (ix86_builtins_isa[fcode].isa
38698 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38700 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38701 NULL, (enum fpmath_unit) 0, false);
38703 if (!opts)
38704 error ("%qE needs unknown isa option", fndecl);
38705 else
38707 gcc_assert (opts != NULL);
38708 error ("%qE needs isa option %s", fndecl, opts);
38709 free (opts);
38711 return const0_rtx;
38714 switch (fcode)
38716 case IX86_BUILTIN_BNDMK:
38717 if (!target
38718 || GET_MODE (target) != BNDmode
38719 || !register_operand (target, BNDmode))
38720 target = gen_reg_rtx (BNDmode);
38722 arg0 = CALL_EXPR_ARG (exp, 0);
38723 arg1 = CALL_EXPR_ARG (exp, 1);
38725 op0 = expand_normal (arg0);
38726 op1 = expand_normal (arg1);
38728 if (!register_operand (op0, Pmode))
38729 op0 = ix86_zero_extend_to_Pmode (op0);
38730 if (!register_operand (op1, Pmode))
38731 op1 = ix86_zero_extend_to_Pmode (op1);
38733 /* Builtin arg1 is size of block but instruction op1 should
38734 be (size - 1). */
38735 op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38736 NULL_RTX, 1, OPTAB_DIRECT);
38738 emit_insn (BNDmode == BND64mode
38739 ? gen_bnd64_mk (target, op0, op1)
38740 : gen_bnd32_mk (target, op0, op1));
38741 return target;
38743 case IX86_BUILTIN_BNDSTX:
38744 arg0 = CALL_EXPR_ARG (exp, 0);
38745 arg1 = CALL_EXPR_ARG (exp, 1);
38746 arg2 = CALL_EXPR_ARG (exp, 2);
38748 op0 = expand_normal (arg0);
38749 op1 = expand_normal (arg1);
38750 op2 = expand_normal (arg2);
38752 if (!register_operand (op0, Pmode))
38753 op0 = ix86_zero_extend_to_Pmode (op0);
38754 if (!register_operand (op1, BNDmode))
38755 op1 = copy_to_mode_reg (BNDmode, op1);
38756 if (!register_operand (op2, Pmode))
38757 op2 = ix86_zero_extend_to_Pmode (op2);
38759 emit_insn (BNDmode == BND64mode
38760 ? gen_bnd64_stx (op2, op0, op1)
38761 : gen_bnd32_stx (op2, op0, op1));
38762 return 0;
38764 case IX86_BUILTIN_BNDLDX:
38765 if (!target
38766 || GET_MODE (target) != BNDmode
38767 || !register_operand (target, BNDmode))
38768 target = gen_reg_rtx (BNDmode);
38770 arg0 = CALL_EXPR_ARG (exp, 0);
38771 arg1 = CALL_EXPR_ARG (exp, 1);
38773 op0 = expand_normal (arg0);
38774 op1 = expand_normal (arg1);
38776 if (!register_operand (op0, Pmode))
38777 op0 = ix86_zero_extend_to_Pmode (op0);
38778 if (!register_operand (op1, Pmode))
38779 op1 = ix86_zero_extend_to_Pmode (op1);
38781 emit_insn (BNDmode == BND64mode
38782 ? gen_bnd64_ldx (target, op0, op1)
38783 : gen_bnd32_ldx (target, op0, op1));
38784 return target;
38786 case IX86_BUILTIN_BNDCL:
38787 arg0 = CALL_EXPR_ARG (exp, 0);
38788 arg1 = CALL_EXPR_ARG (exp, 1);
38790 op0 = expand_normal (arg0);
38791 op1 = expand_normal (arg1);
38793 if (!register_operand (op0, Pmode))
38794 op0 = ix86_zero_extend_to_Pmode (op0);
38795 if (!register_operand (op1, BNDmode))
38796 op1 = copy_to_mode_reg (BNDmode, op1);
38798 emit_insn (BNDmode == BND64mode
38799 ? gen_bnd64_cl (op1, op0)
38800 : gen_bnd32_cl (op1, op0));
38801 return 0;
38803 case IX86_BUILTIN_BNDCU:
38804 arg0 = CALL_EXPR_ARG (exp, 0);
38805 arg1 = CALL_EXPR_ARG (exp, 1);
38807 op0 = expand_normal (arg0);
38808 op1 = expand_normal (arg1);
38810 if (!register_operand (op0, Pmode))
38811 op0 = ix86_zero_extend_to_Pmode (op0);
38812 if (!register_operand (op1, BNDmode))
38813 op1 = copy_to_mode_reg (BNDmode, op1);
38815 emit_insn (BNDmode == BND64mode
38816 ? gen_bnd64_cu (op1, op0)
38817 : gen_bnd32_cu (op1, op0));
38818 return 0;
38820 case IX86_BUILTIN_BNDRET:
38821 arg0 = CALL_EXPR_ARG (exp, 0);
38822 gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38823 target = chkp_get_rtl_bounds (arg0);
38825 /* If no bounds were specified for returned value,
38826 then use INIT bounds. It usually happens when
38827 some built-in function is expanded. */
38828 if (!target)
38830 rtx t1 = gen_reg_rtx (Pmode);
38831 rtx t2 = gen_reg_rtx (Pmode);
38832 target = gen_reg_rtx (BNDmode);
38833 emit_move_insn (t1, const0_rtx);
38834 emit_move_insn (t2, constm1_rtx);
38835 emit_insn (BNDmode == BND64mode
38836 ? gen_bnd64_mk (target, t1, t2)
38837 : gen_bnd32_mk (target, t1, t2));
38840 gcc_assert (target && REG_P (target));
38841 return target;
38843 case IX86_BUILTIN_BNDNARROW:
38845 rtx m1, m1h1, m1h2, lb, ub, t1;
38847 /* Return value and lb. */
38848 arg0 = CALL_EXPR_ARG (exp, 0);
38849 /* Bounds. */
38850 arg1 = CALL_EXPR_ARG (exp, 1);
38851 /* Size. */
38852 arg2 = CALL_EXPR_ARG (exp, 2);
38854 lb = expand_normal (arg0);
38855 op1 = expand_normal (arg1);
38856 op2 = expand_normal (arg2);
38858 /* Size was passed but we need to use (size - 1) as for bndmk. */
38859 op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38860 NULL_RTX, 1, OPTAB_DIRECT);
38862 /* Add LB to size and inverse to get UB. */
38863 op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38864 op2, 1, OPTAB_DIRECT);
38865 ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38867 if (!register_operand (lb, Pmode))
38868 lb = ix86_zero_extend_to_Pmode (lb);
38869 if (!register_operand (ub, Pmode))
38870 ub = ix86_zero_extend_to_Pmode (ub);
38872 /* We need to move bounds to memory before any computations. */
38873 if (MEM_P (op1))
38874 m1 = op1;
38875 else
38877 m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38878 emit_move_insn (m1, op1);
38881 /* Generate mem expression to be used for access to LB and UB. */
38882 m1h1 = adjust_address (m1, Pmode, 0);
38883 m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38885 t1 = gen_reg_rtx (Pmode);
38887 /* Compute LB. */
38888 emit_move_insn (t1, m1h1);
38889 ix86_emit_move_max (t1, lb);
38890 emit_move_insn (m1h1, t1);
38892 /* Compute UB. UB is stored in 1's complement form. Therefore
38893 we also use max here. */
38894 emit_move_insn (t1, m1h2);
38895 ix86_emit_move_max (t1, ub);
38896 emit_move_insn (m1h2, t1);
38898 op2 = gen_reg_rtx (BNDmode);
38899 emit_move_insn (op2, m1);
38901 return chkp_join_splitted_slot (lb, op2);
38904 case IX86_BUILTIN_BNDINT:
38906 rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38908 if (!target
38909 || GET_MODE (target) != BNDmode
38910 || !register_operand (target, BNDmode))
38911 target = gen_reg_rtx (BNDmode);
38913 arg0 = CALL_EXPR_ARG (exp, 0);
38914 arg1 = CALL_EXPR_ARG (exp, 1);
38916 op0 = expand_normal (arg0);
38917 op1 = expand_normal (arg1);
38919 res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38920 rh1 = adjust_address (res, Pmode, 0);
38921 rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38923 /* Put first bounds to temporaries. */
38924 lb1 = gen_reg_rtx (Pmode);
38925 ub1 = gen_reg_rtx (Pmode);
38926 if (MEM_P (op0))
38928 emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38929 emit_move_insn (ub1, adjust_address (op0, Pmode,
38930 GET_MODE_SIZE (Pmode)));
38932 else
38934 emit_move_insn (res, op0);
38935 emit_move_insn (lb1, rh1);
38936 emit_move_insn (ub1, rh2);
38939 /* Put second bounds to temporaries. */
38940 lb2 = gen_reg_rtx (Pmode);
38941 ub2 = gen_reg_rtx (Pmode);
38942 if (MEM_P (op1))
38944 emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38945 emit_move_insn (ub2, adjust_address (op1, Pmode,
38946 GET_MODE_SIZE (Pmode)));
38948 else
38950 emit_move_insn (res, op1);
38951 emit_move_insn (lb2, rh1);
38952 emit_move_insn (ub2, rh2);
38955 /* Compute LB. */
38956 ix86_emit_move_max (lb1, lb2);
38957 emit_move_insn (rh1, lb1);
38959 /* Compute UB. UB is stored in 1's complement form. Therefore
38960 we also use max here. */
38961 ix86_emit_move_max (ub1, ub2);
38962 emit_move_insn (rh2, ub1);
38964 emit_move_insn (target, res);
38966 return target;
38969 case IX86_BUILTIN_SIZEOF:
38971 tree name;
38972 rtx symbol;
38974 if (!target
38975 || GET_MODE (target) != Pmode
38976 || !register_operand (target, Pmode))
38977 target = gen_reg_rtx (Pmode);
38979 arg0 = CALL_EXPR_ARG (exp, 0);
38980 gcc_assert (TREE_CODE (arg0) == VAR_DECL);
38982 name = DECL_ASSEMBLER_NAME (arg0);
38983 symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
38985 emit_insn (Pmode == SImode
38986 ? gen_move_size_reloc_si (target, symbol)
38987 : gen_move_size_reloc_di (target, symbol));
38989 return target;
38992 case IX86_BUILTIN_BNDLOWER:
38994 rtx mem, hmem;
38996 if (!target
38997 || GET_MODE (target) != Pmode
38998 || !register_operand (target, Pmode))
38999 target = gen_reg_rtx (Pmode);
39001 arg0 = CALL_EXPR_ARG (exp, 0);
39002 op0 = expand_normal (arg0);
39004 /* We need to move bounds to memory first. */
39005 if (MEM_P (op0))
39006 mem = op0;
39007 else
39009 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39010 emit_move_insn (mem, op0);
39013 /* Generate mem expression to access LB and load it. */
39014 hmem = adjust_address (mem, Pmode, 0);
39015 emit_move_insn (target, hmem);
39017 return target;
39020 case IX86_BUILTIN_BNDUPPER:
39022 rtx mem, hmem, res;
39024 if (!target
39025 || GET_MODE (target) != Pmode
39026 || !register_operand (target, Pmode))
39027 target = gen_reg_rtx (Pmode);
39029 arg0 = CALL_EXPR_ARG (exp, 0);
39030 op0 = expand_normal (arg0);
39032 /* We need to move bounds to memory first. */
39033 if (MEM_P (op0))
39034 mem = op0;
39035 else
39037 mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39038 emit_move_insn (mem, op0);
39041 /* Generate mem expression to access UB. */
39042 hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
39044 /* We need to inverse all bits of UB. */
39045 res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
39047 if (res != target)
39048 emit_move_insn (target, res);
39050 return target;
39053 case IX86_BUILTIN_MASKMOVQ:
39054 case IX86_BUILTIN_MASKMOVDQU:
39055 icode = (fcode == IX86_BUILTIN_MASKMOVQ
39056 ? CODE_FOR_mmx_maskmovq
39057 : CODE_FOR_sse2_maskmovdqu);
39058 /* Note the arg order is different from the operand order. */
39059 arg1 = CALL_EXPR_ARG (exp, 0);
39060 arg2 = CALL_EXPR_ARG (exp, 1);
39061 arg0 = CALL_EXPR_ARG (exp, 2);
39062 op0 = expand_normal (arg0);
39063 op1 = expand_normal (arg1);
39064 op2 = expand_normal (arg2);
39065 mode0 = insn_data[icode].operand[0].mode;
39066 mode1 = insn_data[icode].operand[1].mode;
39067 mode2 = insn_data[icode].operand[2].mode;
39069 op0 = ix86_zero_extend_to_Pmode (op0);
39070 op0 = gen_rtx_MEM (mode1, op0);
39072 if (!insn_data[icode].operand[0].predicate (op0, mode0))
39073 op0 = copy_to_mode_reg (mode0, op0);
39074 if (!insn_data[icode].operand[1].predicate (op1, mode1))
39075 op1 = copy_to_mode_reg (mode1, op1);
39076 if (!insn_data[icode].operand[2].predicate (op2, mode2))
39077 op2 = copy_to_mode_reg (mode2, op2);
39078 pat = GEN_FCN (icode) (op0, op1, op2);
39079 if (! pat)
39080 return 0;
39081 emit_insn (pat);
39082 return 0;
39084 case IX86_BUILTIN_LDMXCSR:
39085 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39086 target = assign_386_stack_local (SImode, SLOT_TEMP);
39087 emit_move_insn (target, op0);
39088 emit_insn (gen_sse_ldmxcsr (target));
39089 return 0;
39091 case IX86_BUILTIN_STMXCSR:
39092 target = assign_386_stack_local (SImode, SLOT_TEMP);
39093 emit_insn (gen_sse_stmxcsr (target));
39094 return copy_to_mode_reg (SImode, target);
39096 case IX86_BUILTIN_CLFLUSH:
39097 arg0 = CALL_EXPR_ARG (exp, 0);
39098 op0 = expand_normal (arg0);
39099 icode = CODE_FOR_sse2_clflush;
39100 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39101 op0 = ix86_zero_extend_to_Pmode (op0);
39103 emit_insn (gen_sse2_clflush (op0));
39104 return 0;
39106 case IX86_BUILTIN_CLWB:
39107 arg0 = CALL_EXPR_ARG (exp, 0);
39108 op0 = expand_normal (arg0);
39109 icode = CODE_FOR_clwb;
39110 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39111 op0 = ix86_zero_extend_to_Pmode (op0);
39113 emit_insn (gen_clwb (op0));
39114 return 0;
39116 case IX86_BUILTIN_CLFLUSHOPT:
39117 arg0 = CALL_EXPR_ARG (exp, 0);
39118 op0 = expand_normal (arg0);
39119 icode = CODE_FOR_clflushopt;
39120 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39121 op0 = ix86_zero_extend_to_Pmode (op0);
39123 emit_insn (gen_clflushopt (op0));
39124 return 0;
39126 case IX86_BUILTIN_MONITOR:
39127 case IX86_BUILTIN_MONITORX:
39128 arg0 = CALL_EXPR_ARG (exp, 0);
39129 arg1 = CALL_EXPR_ARG (exp, 1);
39130 arg2 = CALL_EXPR_ARG (exp, 2);
39131 op0 = expand_normal (arg0);
39132 op1 = expand_normal (arg1);
39133 op2 = expand_normal (arg2);
39134 if (!REG_P (op0))
39135 op0 = ix86_zero_extend_to_Pmode (op0);
39136 if (!REG_P (op1))
39137 op1 = copy_to_mode_reg (SImode, op1);
39138 if (!REG_P (op2))
39139 op2 = copy_to_mode_reg (SImode, op2);
39141 emit_insn (fcode == IX86_BUILTIN_MONITOR
39142 ? ix86_gen_monitor (op0, op1, op2)
39143 : ix86_gen_monitorx (op0, op1, op2));
39144 return 0;
39146 case IX86_BUILTIN_MWAIT:
39147 arg0 = CALL_EXPR_ARG (exp, 0);
39148 arg1 = CALL_EXPR_ARG (exp, 1);
39149 op0 = expand_normal (arg0);
39150 op1 = expand_normal (arg1);
39151 if (!REG_P (op0))
39152 op0 = copy_to_mode_reg (SImode, op0);
39153 if (!REG_P (op1))
39154 op1 = copy_to_mode_reg (SImode, op1);
39155 emit_insn (gen_sse3_mwait (op0, op1));
39156 return 0;
39158 case IX86_BUILTIN_MWAITX:
39159 arg0 = CALL_EXPR_ARG (exp, 0);
39160 arg1 = CALL_EXPR_ARG (exp, 1);
39161 arg2 = CALL_EXPR_ARG (exp, 2);
39162 op0 = expand_normal (arg0);
39163 op1 = expand_normal (arg1);
39164 op2 = expand_normal (arg2);
39165 if (!REG_P (op0))
39166 op0 = copy_to_mode_reg (SImode, op0);
39167 if (!REG_P (op1))
39168 op1 = copy_to_mode_reg (SImode, op1);
39169 if (!REG_P (op2))
39170 op2 = copy_to_mode_reg (SImode, op2);
39171 emit_insn (gen_mwaitx (op0, op1, op2));
39172 return 0;
39174 case IX86_BUILTIN_VEC_INIT_V2SI:
39175 case IX86_BUILTIN_VEC_INIT_V4HI:
39176 case IX86_BUILTIN_VEC_INIT_V8QI:
39177 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39179 case IX86_BUILTIN_VEC_EXT_V2DF:
39180 case IX86_BUILTIN_VEC_EXT_V2DI:
39181 case IX86_BUILTIN_VEC_EXT_V4SF:
39182 case IX86_BUILTIN_VEC_EXT_V4SI:
39183 case IX86_BUILTIN_VEC_EXT_V8HI:
39184 case IX86_BUILTIN_VEC_EXT_V2SI:
39185 case IX86_BUILTIN_VEC_EXT_V4HI:
39186 case IX86_BUILTIN_VEC_EXT_V16QI:
39187 return ix86_expand_vec_ext_builtin (exp, target);
39189 case IX86_BUILTIN_VEC_SET_V2DI:
39190 case IX86_BUILTIN_VEC_SET_V4SF:
39191 case IX86_BUILTIN_VEC_SET_V4SI:
39192 case IX86_BUILTIN_VEC_SET_V8HI:
39193 case IX86_BUILTIN_VEC_SET_V4HI:
39194 case IX86_BUILTIN_VEC_SET_V16QI:
39195 return ix86_expand_vec_set_builtin (exp);
39197 case IX86_BUILTIN_INFQ:
39198 case IX86_BUILTIN_HUGE_VALQ:
39200 REAL_VALUE_TYPE inf;
39201 rtx tmp;
39203 real_inf (&inf);
39204 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39206 tmp = validize_mem (force_const_mem (mode, tmp));
39208 if (target == 0)
39209 target = gen_reg_rtx (mode);
39211 emit_move_insn (target, tmp);
39212 return target;
39215 case IX86_BUILTIN_RDPMC:
39216 case IX86_BUILTIN_RDTSC:
39217 case IX86_BUILTIN_RDTSCP:
39219 op0 = gen_reg_rtx (DImode);
39220 op1 = gen_reg_rtx (DImode);
39222 if (fcode == IX86_BUILTIN_RDPMC)
39224 arg0 = CALL_EXPR_ARG (exp, 0);
39225 op2 = expand_normal (arg0);
39226 if (!register_operand (op2, SImode))
39227 op2 = copy_to_mode_reg (SImode, op2);
39229 insn = (TARGET_64BIT
39230 ? gen_rdpmc_rex64 (op0, op1, op2)
39231 : gen_rdpmc (op0, op2));
39232 emit_insn (insn);
39234 else if (fcode == IX86_BUILTIN_RDTSC)
39236 insn = (TARGET_64BIT
39237 ? gen_rdtsc_rex64 (op0, op1)
39238 : gen_rdtsc (op0));
39239 emit_insn (insn);
39241 else
39243 op2 = gen_reg_rtx (SImode);
39245 insn = (TARGET_64BIT
39246 ? gen_rdtscp_rex64 (op0, op1, op2)
39247 : gen_rdtscp (op0, op2));
39248 emit_insn (insn);
39250 arg0 = CALL_EXPR_ARG (exp, 0);
39251 op4 = expand_normal (arg0);
39252 if (!address_operand (op4, VOIDmode))
39254 op4 = convert_memory_address (Pmode, op4);
39255 op4 = copy_addr_to_reg (op4);
39257 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39260 if (target == 0)
39262 /* mode is VOIDmode if __builtin_rd* has been called
39263 without lhs. */
39264 if (mode == VOIDmode)
39265 return target;
39266 target = gen_reg_rtx (mode);
39269 if (TARGET_64BIT)
39271 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39272 op1, 1, OPTAB_DIRECT);
39273 op0 = expand_simple_binop (DImode, IOR, op0, op1,
39274 op0, 1, OPTAB_DIRECT);
39277 emit_move_insn (target, op0);
39278 return target;
39280 case IX86_BUILTIN_FXSAVE:
39281 case IX86_BUILTIN_FXRSTOR:
39282 case IX86_BUILTIN_FXSAVE64:
39283 case IX86_BUILTIN_FXRSTOR64:
39284 case IX86_BUILTIN_FNSTENV:
39285 case IX86_BUILTIN_FLDENV:
39286 mode0 = BLKmode;
39287 switch (fcode)
39289 case IX86_BUILTIN_FXSAVE:
39290 icode = CODE_FOR_fxsave;
39291 break;
39292 case IX86_BUILTIN_FXRSTOR:
39293 icode = CODE_FOR_fxrstor;
39294 break;
39295 case IX86_BUILTIN_FXSAVE64:
39296 icode = CODE_FOR_fxsave64;
39297 break;
39298 case IX86_BUILTIN_FXRSTOR64:
39299 icode = CODE_FOR_fxrstor64;
39300 break;
39301 case IX86_BUILTIN_FNSTENV:
39302 icode = CODE_FOR_fnstenv;
39303 break;
39304 case IX86_BUILTIN_FLDENV:
39305 icode = CODE_FOR_fldenv;
39306 break;
39307 default:
39308 gcc_unreachable ();
39311 arg0 = CALL_EXPR_ARG (exp, 0);
39312 op0 = expand_normal (arg0);
39314 if (!address_operand (op0, VOIDmode))
39316 op0 = convert_memory_address (Pmode, op0);
39317 op0 = copy_addr_to_reg (op0);
39319 op0 = gen_rtx_MEM (mode0, op0);
39321 pat = GEN_FCN (icode) (op0);
39322 if (pat)
39323 emit_insn (pat);
39324 return 0;
39326 case IX86_BUILTIN_XSAVE:
39327 case IX86_BUILTIN_XRSTOR:
39328 case IX86_BUILTIN_XSAVE64:
39329 case IX86_BUILTIN_XRSTOR64:
39330 case IX86_BUILTIN_XSAVEOPT:
39331 case IX86_BUILTIN_XSAVEOPT64:
39332 case IX86_BUILTIN_XSAVES:
39333 case IX86_BUILTIN_XRSTORS:
39334 case IX86_BUILTIN_XSAVES64:
39335 case IX86_BUILTIN_XRSTORS64:
39336 case IX86_BUILTIN_XSAVEC:
39337 case IX86_BUILTIN_XSAVEC64:
39338 arg0 = CALL_EXPR_ARG (exp, 0);
39339 arg1 = CALL_EXPR_ARG (exp, 1);
39340 op0 = expand_normal (arg0);
39341 op1 = expand_normal (arg1);
39343 if (!address_operand (op0, VOIDmode))
39345 op0 = convert_memory_address (Pmode, op0);
39346 op0 = copy_addr_to_reg (op0);
39348 op0 = gen_rtx_MEM (BLKmode, op0);
39350 op1 = force_reg (DImode, op1);
39352 if (TARGET_64BIT)
39354 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39355 NULL, 1, OPTAB_DIRECT);
39356 switch (fcode)
39358 case IX86_BUILTIN_XSAVE:
39359 icode = CODE_FOR_xsave_rex64;
39360 break;
39361 case IX86_BUILTIN_XRSTOR:
39362 icode = CODE_FOR_xrstor_rex64;
39363 break;
39364 case IX86_BUILTIN_XSAVE64:
39365 icode = CODE_FOR_xsave64;
39366 break;
39367 case IX86_BUILTIN_XRSTOR64:
39368 icode = CODE_FOR_xrstor64;
39369 break;
39370 case IX86_BUILTIN_XSAVEOPT:
39371 icode = CODE_FOR_xsaveopt_rex64;
39372 break;
39373 case IX86_BUILTIN_XSAVEOPT64:
39374 icode = CODE_FOR_xsaveopt64;
39375 break;
39376 case IX86_BUILTIN_XSAVES:
39377 icode = CODE_FOR_xsaves_rex64;
39378 break;
39379 case IX86_BUILTIN_XRSTORS:
39380 icode = CODE_FOR_xrstors_rex64;
39381 break;
39382 case IX86_BUILTIN_XSAVES64:
39383 icode = CODE_FOR_xsaves64;
39384 break;
39385 case IX86_BUILTIN_XRSTORS64:
39386 icode = CODE_FOR_xrstors64;
39387 break;
39388 case IX86_BUILTIN_XSAVEC:
39389 icode = CODE_FOR_xsavec_rex64;
39390 break;
39391 case IX86_BUILTIN_XSAVEC64:
39392 icode = CODE_FOR_xsavec64;
39393 break;
39394 default:
39395 gcc_unreachable ();
39398 op2 = gen_lowpart (SImode, op2);
39399 op1 = gen_lowpart (SImode, op1);
39400 pat = GEN_FCN (icode) (op0, op1, op2);
39402 else
39404 switch (fcode)
39406 case IX86_BUILTIN_XSAVE:
39407 icode = CODE_FOR_xsave;
39408 break;
39409 case IX86_BUILTIN_XRSTOR:
39410 icode = CODE_FOR_xrstor;
39411 break;
39412 case IX86_BUILTIN_XSAVEOPT:
39413 icode = CODE_FOR_xsaveopt;
39414 break;
39415 case IX86_BUILTIN_XSAVES:
39416 icode = CODE_FOR_xsaves;
39417 break;
39418 case IX86_BUILTIN_XRSTORS:
39419 icode = CODE_FOR_xrstors;
39420 break;
39421 case IX86_BUILTIN_XSAVEC:
39422 icode = CODE_FOR_xsavec;
39423 break;
39424 default:
39425 gcc_unreachable ();
39427 pat = GEN_FCN (icode) (op0, op1);
39430 if (pat)
39431 emit_insn (pat);
39432 return 0;
39434 case IX86_BUILTIN_LLWPCB:
39435 arg0 = CALL_EXPR_ARG (exp, 0);
39436 op0 = expand_normal (arg0);
39437 icode = CODE_FOR_lwp_llwpcb;
39438 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39439 op0 = ix86_zero_extend_to_Pmode (op0);
39440 emit_insn (gen_lwp_llwpcb (op0));
39441 return 0;
39443 case IX86_BUILTIN_SLWPCB:
39444 icode = CODE_FOR_lwp_slwpcb;
39445 if (!target
39446 || !insn_data[icode].operand[0].predicate (target, Pmode))
39447 target = gen_reg_rtx (Pmode);
39448 emit_insn (gen_lwp_slwpcb (target));
39449 return target;
39451 case IX86_BUILTIN_BEXTRI32:
39452 case IX86_BUILTIN_BEXTRI64:
39453 arg0 = CALL_EXPR_ARG (exp, 0);
39454 arg1 = CALL_EXPR_ARG (exp, 1);
39455 op0 = expand_normal (arg0);
39456 op1 = expand_normal (arg1);
39457 icode = (fcode == IX86_BUILTIN_BEXTRI32
39458 ? CODE_FOR_tbm_bextri_si
39459 : CODE_FOR_tbm_bextri_di);
39460 if (!CONST_INT_P (op1))
39462 error ("last argument must be an immediate");
39463 return const0_rtx;
39465 else
39467 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39468 unsigned char lsb_index = INTVAL (op1) & 0xFF;
39469 op1 = GEN_INT (length);
39470 op2 = GEN_INT (lsb_index);
39471 pat = GEN_FCN (icode) (target, op0, op1, op2);
39472 if (pat)
39473 emit_insn (pat);
39474 return target;
39477 case IX86_BUILTIN_RDRAND16_STEP:
39478 icode = CODE_FOR_rdrandhi_1;
39479 mode0 = HImode;
39480 goto rdrand_step;
39482 case IX86_BUILTIN_RDRAND32_STEP:
39483 icode = CODE_FOR_rdrandsi_1;
39484 mode0 = SImode;
39485 goto rdrand_step;
39487 case IX86_BUILTIN_RDRAND64_STEP:
39488 icode = CODE_FOR_rdranddi_1;
39489 mode0 = DImode;
39491 rdrand_step:
39492 op0 = gen_reg_rtx (mode0);
39493 emit_insn (GEN_FCN (icode) (op0));
39495 arg0 = CALL_EXPR_ARG (exp, 0);
39496 op1 = expand_normal (arg0);
39497 if (!address_operand (op1, VOIDmode))
39499 op1 = convert_memory_address (Pmode, op1);
39500 op1 = copy_addr_to_reg (op1);
39502 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39504 op1 = gen_reg_rtx (SImode);
39505 emit_move_insn (op1, CONST1_RTX (SImode));
39507 /* Emit SImode conditional move. */
39508 if (mode0 == HImode)
39510 op2 = gen_reg_rtx (SImode);
39511 emit_insn (gen_zero_extendhisi2 (op2, op0));
39513 else if (mode0 == SImode)
39514 op2 = op0;
39515 else
39516 op2 = gen_rtx_SUBREG (SImode, op0, 0);
39518 if (target == 0
39519 || !register_operand (target, SImode))
39520 target = gen_reg_rtx (SImode);
39522 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39523 const0_rtx);
39524 emit_insn (gen_rtx_SET (target,
39525 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39526 return target;
39528 case IX86_BUILTIN_RDSEED16_STEP:
39529 icode = CODE_FOR_rdseedhi_1;
39530 mode0 = HImode;
39531 goto rdseed_step;
39533 case IX86_BUILTIN_RDSEED32_STEP:
39534 icode = CODE_FOR_rdseedsi_1;
39535 mode0 = SImode;
39536 goto rdseed_step;
39538 case IX86_BUILTIN_RDSEED64_STEP:
39539 icode = CODE_FOR_rdseeddi_1;
39540 mode0 = DImode;
39542 rdseed_step:
39543 op0 = gen_reg_rtx (mode0);
39544 emit_insn (GEN_FCN (icode) (op0));
39546 arg0 = CALL_EXPR_ARG (exp, 0);
39547 op1 = expand_normal (arg0);
39548 if (!address_operand (op1, VOIDmode))
39550 op1 = convert_memory_address (Pmode, op1);
39551 op1 = copy_addr_to_reg (op1);
39553 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39555 op2 = gen_reg_rtx (QImode);
39557 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39558 const0_rtx);
39559 emit_insn (gen_rtx_SET (op2, pat));
39561 if (target == 0
39562 || !register_operand (target, SImode))
39563 target = gen_reg_rtx (SImode);
39565 emit_insn (gen_zero_extendqisi2 (target, op2));
39566 return target;
39568 case IX86_BUILTIN_SBB32:
39569 icode = CODE_FOR_subsi3_carry;
39570 mode0 = SImode;
39571 goto addcarryx;
39573 case IX86_BUILTIN_SBB64:
39574 icode = CODE_FOR_subdi3_carry;
39575 mode0 = DImode;
39576 goto addcarryx;
39578 case IX86_BUILTIN_ADDCARRYX32:
39579 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
39580 mode0 = SImode;
39581 goto addcarryx;
39583 case IX86_BUILTIN_ADDCARRYX64:
39584 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
39585 mode0 = DImode;
39587 addcarryx:
39588 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
39589 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
39590 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
39591 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
39593 op0 = gen_reg_rtx (QImode);
39595 /* Generate CF from input operand. */
39596 op1 = expand_normal (arg0);
39597 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39598 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
39600 /* Gen ADCX instruction to compute X+Y+CF. */
39601 op2 = expand_normal (arg1);
39602 op3 = expand_normal (arg2);
39604 if (!REG_P (op2))
39605 op2 = copy_to_mode_reg (mode0, op2);
39606 if (!REG_P (op3))
39607 op3 = copy_to_mode_reg (mode0, op3);
39609 op0 = gen_reg_rtx (mode0);
39611 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
39612 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
39613 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
39615 /* Store the result. */
39616 op4 = expand_normal (arg3);
39617 if (!address_operand (op4, VOIDmode))
39619 op4 = convert_memory_address (Pmode, op4);
39620 op4 = copy_addr_to_reg (op4);
39622 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39624 /* Return current CF value. */
39625 if (target == 0)
39626 target = gen_reg_rtx (QImode);
39628 PUT_MODE (pat, QImode);
39629 emit_insn (gen_rtx_SET (target, pat));
39630 return target;
39632 case IX86_BUILTIN_READ_FLAGS:
39633 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39635 if (optimize
39636 || target == NULL_RTX
39637 || !nonimmediate_operand (target, word_mode)
39638 || GET_MODE (target) != word_mode)
39639 target = gen_reg_rtx (word_mode);
39641 emit_insn (gen_pop (target));
39642 return target;
39644 case IX86_BUILTIN_WRITE_FLAGS:
39646 arg0 = CALL_EXPR_ARG (exp, 0);
39647 op0 = expand_normal (arg0);
39648 if (!general_no_elim_operand (op0, word_mode))
39649 op0 = copy_to_mode_reg (word_mode, op0);
39651 emit_insn (gen_push (op0));
39652 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39653 return 0;
39655 case IX86_BUILTIN_KORTESTC16:
39656 icode = CODE_FOR_kortestchi;
39657 mode0 = HImode;
39658 mode1 = CCCmode;
39659 goto kortest;
39661 case IX86_BUILTIN_KORTESTZ16:
39662 icode = CODE_FOR_kortestzhi;
39663 mode0 = HImode;
39664 mode1 = CCZmode;
39666 kortest:
39667 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
39668 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
39669 op0 = expand_normal (arg0);
39670 op1 = expand_normal (arg1);
39672 op0 = copy_to_reg (op0);
39673 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39674 op1 = copy_to_reg (op1);
39675 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39677 target = gen_reg_rtx (QImode);
39678 emit_insn (gen_rtx_SET (target, const0_rtx));
39680 /* Emit kortest. */
39681 emit_insn (GEN_FCN (icode) (op0, op1));
39682 /* And use setcc to return result from flags. */
39683 ix86_expand_setcc (target, EQ,
39684 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39685 return target;
39687 case IX86_BUILTIN_GATHERSIV2DF:
39688 icode = CODE_FOR_avx2_gathersiv2df;
39689 goto gather_gen;
39690 case IX86_BUILTIN_GATHERSIV4DF:
39691 icode = CODE_FOR_avx2_gathersiv4df;
39692 goto gather_gen;
39693 case IX86_BUILTIN_GATHERDIV2DF:
39694 icode = CODE_FOR_avx2_gatherdiv2df;
39695 goto gather_gen;
39696 case IX86_BUILTIN_GATHERDIV4DF:
39697 icode = CODE_FOR_avx2_gatherdiv4df;
39698 goto gather_gen;
39699 case IX86_BUILTIN_GATHERSIV4SF:
39700 icode = CODE_FOR_avx2_gathersiv4sf;
39701 goto gather_gen;
39702 case IX86_BUILTIN_GATHERSIV8SF:
39703 icode = CODE_FOR_avx2_gathersiv8sf;
39704 goto gather_gen;
39705 case IX86_BUILTIN_GATHERDIV4SF:
39706 icode = CODE_FOR_avx2_gatherdiv4sf;
39707 goto gather_gen;
39708 case IX86_BUILTIN_GATHERDIV8SF:
39709 icode = CODE_FOR_avx2_gatherdiv8sf;
39710 goto gather_gen;
39711 case IX86_BUILTIN_GATHERSIV2DI:
39712 icode = CODE_FOR_avx2_gathersiv2di;
39713 goto gather_gen;
39714 case IX86_BUILTIN_GATHERSIV4DI:
39715 icode = CODE_FOR_avx2_gathersiv4di;
39716 goto gather_gen;
39717 case IX86_BUILTIN_GATHERDIV2DI:
39718 icode = CODE_FOR_avx2_gatherdiv2di;
39719 goto gather_gen;
39720 case IX86_BUILTIN_GATHERDIV4DI:
39721 icode = CODE_FOR_avx2_gatherdiv4di;
39722 goto gather_gen;
39723 case IX86_BUILTIN_GATHERSIV4SI:
39724 icode = CODE_FOR_avx2_gathersiv4si;
39725 goto gather_gen;
39726 case IX86_BUILTIN_GATHERSIV8SI:
39727 icode = CODE_FOR_avx2_gathersiv8si;
39728 goto gather_gen;
39729 case IX86_BUILTIN_GATHERDIV4SI:
39730 icode = CODE_FOR_avx2_gatherdiv4si;
39731 goto gather_gen;
39732 case IX86_BUILTIN_GATHERDIV8SI:
39733 icode = CODE_FOR_avx2_gatherdiv8si;
39734 goto gather_gen;
39735 case IX86_BUILTIN_GATHERALTSIV4DF:
39736 icode = CODE_FOR_avx2_gathersiv4df;
39737 goto gather_gen;
39738 case IX86_BUILTIN_GATHERALTDIV8SF:
39739 icode = CODE_FOR_avx2_gatherdiv8sf;
39740 goto gather_gen;
39741 case IX86_BUILTIN_GATHERALTSIV4DI:
39742 icode = CODE_FOR_avx2_gathersiv4di;
39743 goto gather_gen;
39744 case IX86_BUILTIN_GATHERALTDIV8SI:
39745 icode = CODE_FOR_avx2_gatherdiv8si;
39746 goto gather_gen;
39747 case IX86_BUILTIN_GATHER3SIV16SF:
39748 icode = CODE_FOR_avx512f_gathersiv16sf;
39749 goto gather_gen;
39750 case IX86_BUILTIN_GATHER3SIV8DF:
39751 icode = CODE_FOR_avx512f_gathersiv8df;
39752 goto gather_gen;
39753 case IX86_BUILTIN_GATHER3DIV16SF:
39754 icode = CODE_FOR_avx512f_gatherdiv16sf;
39755 goto gather_gen;
39756 case IX86_BUILTIN_GATHER3DIV8DF:
39757 icode = CODE_FOR_avx512f_gatherdiv8df;
39758 goto gather_gen;
39759 case IX86_BUILTIN_GATHER3SIV16SI:
39760 icode = CODE_FOR_avx512f_gathersiv16si;
39761 goto gather_gen;
39762 case IX86_BUILTIN_GATHER3SIV8DI:
39763 icode = CODE_FOR_avx512f_gathersiv8di;
39764 goto gather_gen;
39765 case IX86_BUILTIN_GATHER3DIV16SI:
39766 icode = CODE_FOR_avx512f_gatherdiv16si;
39767 goto gather_gen;
39768 case IX86_BUILTIN_GATHER3DIV8DI:
39769 icode = CODE_FOR_avx512f_gatherdiv8di;
39770 goto gather_gen;
39771 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39772 icode = CODE_FOR_avx512f_gathersiv8df;
39773 goto gather_gen;
39774 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39775 icode = CODE_FOR_avx512f_gatherdiv16sf;
39776 goto gather_gen;
39777 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39778 icode = CODE_FOR_avx512f_gathersiv8di;
39779 goto gather_gen;
39780 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39781 icode = CODE_FOR_avx512f_gatherdiv16si;
39782 goto gather_gen;
39783 case IX86_BUILTIN_GATHER3SIV2DF:
39784 icode = CODE_FOR_avx512vl_gathersiv2df;
39785 goto gather_gen;
39786 case IX86_BUILTIN_GATHER3SIV4DF:
39787 icode = CODE_FOR_avx512vl_gathersiv4df;
39788 goto gather_gen;
39789 case IX86_BUILTIN_GATHER3DIV2DF:
39790 icode = CODE_FOR_avx512vl_gatherdiv2df;
39791 goto gather_gen;
39792 case IX86_BUILTIN_GATHER3DIV4DF:
39793 icode = CODE_FOR_avx512vl_gatherdiv4df;
39794 goto gather_gen;
39795 case IX86_BUILTIN_GATHER3SIV4SF:
39796 icode = CODE_FOR_avx512vl_gathersiv4sf;
39797 goto gather_gen;
39798 case IX86_BUILTIN_GATHER3SIV8SF:
39799 icode = CODE_FOR_avx512vl_gathersiv8sf;
39800 goto gather_gen;
39801 case IX86_BUILTIN_GATHER3DIV4SF:
39802 icode = CODE_FOR_avx512vl_gatherdiv4sf;
39803 goto gather_gen;
39804 case IX86_BUILTIN_GATHER3DIV8SF:
39805 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39806 goto gather_gen;
39807 case IX86_BUILTIN_GATHER3SIV2DI:
39808 icode = CODE_FOR_avx512vl_gathersiv2di;
39809 goto gather_gen;
39810 case IX86_BUILTIN_GATHER3SIV4DI:
39811 icode = CODE_FOR_avx512vl_gathersiv4di;
39812 goto gather_gen;
39813 case IX86_BUILTIN_GATHER3DIV2DI:
39814 icode = CODE_FOR_avx512vl_gatherdiv2di;
39815 goto gather_gen;
39816 case IX86_BUILTIN_GATHER3DIV4DI:
39817 icode = CODE_FOR_avx512vl_gatherdiv4di;
39818 goto gather_gen;
39819 case IX86_BUILTIN_GATHER3SIV4SI:
39820 icode = CODE_FOR_avx512vl_gathersiv4si;
39821 goto gather_gen;
39822 case IX86_BUILTIN_GATHER3SIV8SI:
39823 icode = CODE_FOR_avx512vl_gathersiv8si;
39824 goto gather_gen;
39825 case IX86_BUILTIN_GATHER3DIV4SI:
39826 icode = CODE_FOR_avx512vl_gatherdiv4si;
39827 goto gather_gen;
39828 case IX86_BUILTIN_GATHER3DIV8SI:
39829 icode = CODE_FOR_avx512vl_gatherdiv8si;
39830 goto gather_gen;
39831 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39832 icode = CODE_FOR_avx512vl_gathersiv4df;
39833 goto gather_gen;
39834 case IX86_BUILTIN_GATHER3ALTDIV8SF:
39835 icode = CODE_FOR_avx512vl_gatherdiv8sf;
39836 goto gather_gen;
39837 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39838 icode = CODE_FOR_avx512vl_gathersiv4di;
39839 goto gather_gen;
39840 case IX86_BUILTIN_GATHER3ALTDIV8SI:
39841 icode = CODE_FOR_avx512vl_gatherdiv8si;
39842 goto gather_gen;
39843 case IX86_BUILTIN_SCATTERSIV16SF:
39844 icode = CODE_FOR_avx512f_scattersiv16sf;
39845 goto scatter_gen;
39846 case IX86_BUILTIN_SCATTERSIV8DF:
39847 icode = CODE_FOR_avx512f_scattersiv8df;
39848 goto scatter_gen;
39849 case IX86_BUILTIN_SCATTERDIV16SF:
39850 icode = CODE_FOR_avx512f_scatterdiv16sf;
39851 goto scatter_gen;
39852 case IX86_BUILTIN_SCATTERDIV8DF:
39853 icode = CODE_FOR_avx512f_scatterdiv8df;
39854 goto scatter_gen;
39855 case IX86_BUILTIN_SCATTERSIV16SI:
39856 icode = CODE_FOR_avx512f_scattersiv16si;
39857 goto scatter_gen;
39858 case IX86_BUILTIN_SCATTERSIV8DI:
39859 icode = CODE_FOR_avx512f_scattersiv8di;
39860 goto scatter_gen;
39861 case IX86_BUILTIN_SCATTERDIV16SI:
39862 icode = CODE_FOR_avx512f_scatterdiv16si;
39863 goto scatter_gen;
39864 case IX86_BUILTIN_SCATTERDIV8DI:
39865 icode = CODE_FOR_avx512f_scatterdiv8di;
39866 goto scatter_gen;
39867 case IX86_BUILTIN_SCATTERSIV8SF:
39868 icode = CODE_FOR_avx512vl_scattersiv8sf;
39869 goto scatter_gen;
39870 case IX86_BUILTIN_SCATTERSIV4SF:
39871 icode = CODE_FOR_avx512vl_scattersiv4sf;
39872 goto scatter_gen;
39873 case IX86_BUILTIN_SCATTERSIV4DF:
39874 icode = CODE_FOR_avx512vl_scattersiv4df;
39875 goto scatter_gen;
39876 case IX86_BUILTIN_SCATTERSIV2DF:
39877 icode = CODE_FOR_avx512vl_scattersiv2df;
39878 goto scatter_gen;
39879 case IX86_BUILTIN_SCATTERDIV8SF:
39880 icode = CODE_FOR_avx512vl_scatterdiv8sf;
39881 goto scatter_gen;
39882 case IX86_BUILTIN_SCATTERDIV4SF:
39883 icode = CODE_FOR_avx512vl_scatterdiv4sf;
39884 goto scatter_gen;
39885 case IX86_BUILTIN_SCATTERDIV4DF:
39886 icode = CODE_FOR_avx512vl_scatterdiv4df;
39887 goto scatter_gen;
39888 case IX86_BUILTIN_SCATTERDIV2DF:
39889 icode = CODE_FOR_avx512vl_scatterdiv2df;
39890 goto scatter_gen;
39891 case IX86_BUILTIN_SCATTERSIV8SI:
39892 icode = CODE_FOR_avx512vl_scattersiv8si;
39893 goto scatter_gen;
39894 case IX86_BUILTIN_SCATTERSIV4SI:
39895 icode = CODE_FOR_avx512vl_scattersiv4si;
39896 goto scatter_gen;
39897 case IX86_BUILTIN_SCATTERSIV4DI:
39898 icode = CODE_FOR_avx512vl_scattersiv4di;
39899 goto scatter_gen;
39900 case IX86_BUILTIN_SCATTERSIV2DI:
39901 icode = CODE_FOR_avx512vl_scattersiv2di;
39902 goto scatter_gen;
39903 case IX86_BUILTIN_SCATTERDIV8SI:
39904 icode = CODE_FOR_avx512vl_scatterdiv8si;
39905 goto scatter_gen;
39906 case IX86_BUILTIN_SCATTERDIV4SI:
39907 icode = CODE_FOR_avx512vl_scatterdiv4si;
39908 goto scatter_gen;
39909 case IX86_BUILTIN_SCATTERDIV4DI:
39910 icode = CODE_FOR_avx512vl_scatterdiv4di;
39911 goto scatter_gen;
39912 case IX86_BUILTIN_SCATTERDIV2DI:
39913 icode = CODE_FOR_avx512vl_scatterdiv2di;
39914 goto scatter_gen;
39915 case IX86_BUILTIN_GATHERPFDPD:
39916 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39917 goto vec_prefetch_gen;
39918 case IX86_BUILTIN_GATHERPFDPS:
39919 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39920 goto vec_prefetch_gen;
39921 case IX86_BUILTIN_GATHERPFQPD:
39922 icode = CODE_FOR_avx512pf_gatherpfv8didf;
39923 goto vec_prefetch_gen;
39924 case IX86_BUILTIN_GATHERPFQPS:
39925 icode = CODE_FOR_avx512pf_gatherpfv8disf;
39926 goto vec_prefetch_gen;
39927 case IX86_BUILTIN_SCATTERPFDPD:
39928 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39929 goto vec_prefetch_gen;
39930 case IX86_BUILTIN_SCATTERPFDPS:
39931 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39932 goto vec_prefetch_gen;
39933 case IX86_BUILTIN_SCATTERPFQPD:
39934 icode = CODE_FOR_avx512pf_scatterpfv8didf;
39935 goto vec_prefetch_gen;
39936 case IX86_BUILTIN_SCATTERPFQPS:
39937 icode = CODE_FOR_avx512pf_scatterpfv8disf;
39938 goto vec_prefetch_gen;
39940 gather_gen:
39941 rtx half;
39942 rtx (*gen) (rtx, rtx);
39944 arg0 = CALL_EXPR_ARG (exp, 0);
39945 arg1 = CALL_EXPR_ARG (exp, 1);
39946 arg2 = CALL_EXPR_ARG (exp, 2);
39947 arg3 = CALL_EXPR_ARG (exp, 3);
39948 arg4 = CALL_EXPR_ARG (exp, 4);
39949 op0 = expand_normal (arg0);
39950 op1 = expand_normal (arg1);
39951 op2 = expand_normal (arg2);
39952 op3 = expand_normal (arg3);
39953 op4 = expand_normal (arg4);
39954 /* Note the arg order is different from the operand order. */
39955 mode0 = insn_data[icode].operand[1].mode;
39956 mode2 = insn_data[icode].operand[3].mode;
39957 mode3 = insn_data[icode].operand[4].mode;
39958 mode4 = insn_data[icode].operand[5].mode;
39960 if (target == NULL_RTX
39961 || GET_MODE (target) != insn_data[icode].operand[0].mode
39962 || !insn_data[icode].operand[0].predicate (target,
39963 GET_MODE (target)))
39964 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
39965 else
39966 subtarget = target;
39968 switch (fcode)
39970 case IX86_BUILTIN_GATHER3ALTSIV8DF:
39971 case IX86_BUILTIN_GATHER3ALTSIV8DI:
39972 half = gen_reg_rtx (V8SImode);
39973 if (!nonimmediate_operand (op2, V16SImode))
39974 op2 = copy_to_mode_reg (V16SImode, op2);
39975 emit_insn (gen_vec_extract_lo_v16si (half, op2));
39976 op2 = half;
39977 break;
39978 case IX86_BUILTIN_GATHER3ALTSIV4DF:
39979 case IX86_BUILTIN_GATHER3ALTSIV4DI:
39980 case IX86_BUILTIN_GATHERALTSIV4DF:
39981 case IX86_BUILTIN_GATHERALTSIV4DI:
39982 half = gen_reg_rtx (V4SImode);
39983 if (!nonimmediate_operand (op2, V8SImode))
39984 op2 = copy_to_mode_reg (V8SImode, op2);
39985 emit_insn (gen_vec_extract_lo_v8si (half, op2));
39986 op2 = half;
39987 break;
39988 case IX86_BUILTIN_GATHER3ALTDIV16SF:
39989 case IX86_BUILTIN_GATHER3ALTDIV16SI:
39990 half = gen_reg_rtx (mode0);
39991 if (mode0 == V8SFmode)
39992 gen = gen_vec_extract_lo_v16sf;
39993 else
39994 gen = gen_vec_extract_lo_v16si;
39995 if (!nonimmediate_operand (op0, GET_MODE (op0)))
39996 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
39997 emit_insn (gen (half, op0));
39998 op0 = half;
39999 if (GET_MODE (op3) != VOIDmode)
40001 if (!nonimmediate_operand (op3, GET_MODE (op3)))
40002 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40003 emit_insn (gen (half, op3));
40004 op3 = half;
40006 break;
40007 case IX86_BUILTIN_GATHER3ALTDIV8SF:
40008 case IX86_BUILTIN_GATHER3ALTDIV8SI:
40009 case IX86_BUILTIN_GATHERALTDIV8SF:
40010 case IX86_BUILTIN_GATHERALTDIV8SI:
40011 half = gen_reg_rtx (mode0);
40012 if (mode0 == V4SFmode)
40013 gen = gen_vec_extract_lo_v8sf;
40014 else
40015 gen = gen_vec_extract_lo_v8si;
40016 if (!nonimmediate_operand (op0, GET_MODE (op0)))
40017 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40018 emit_insn (gen (half, op0));
40019 op0 = half;
40020 if (GET_MODE (op3) != VOIDmode)
40022 if (!nonimmediate_operand (op3, GET_MODE (op3)))
40023 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40024 emit_insn (gen (half, op3));
40025 op3 = half;
40027 break;
40028 default:
40029 break;
40032 /* Force memory operand only with base register here. But we
40033 don't want to do it on memory operand for other builtin
40034 functions. */
40035 op1 = ix86_zero_extend_to_Pmode (op1);
40037 if (!insn_data[icode].operand[1].predicate (op0, mode0))
40038 op0 = copy_to_mode_reg (mode0, op0);
40039 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
40040 op1 = copy_to_mode_reg (Pmode, op1);
40041 if (!insn_data[icode].operand[3].predicate (op2, mode2))
40042 op2 = copy_to_mode_reg (mode2, op2);
40044 op3 = fixup_modeless_constant (op3, mode3);
40046 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
40048 if (!insn_data[icode].operand[4].predicate (op3, mode3))
40049 op3 = copy_to_mode_reg (mode3, op3);
40051 else
40053 op3 = copy_to_reg (op3);
40054 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
40056 if (!insn_data[icode].operand[5].predicate (op4, mode4))
40058 error ("the last argument must be scale 1, 2, 4, 8");
40059 return const0_rtx;
40062 /* Optimize. If mask is known to have all high bits set,
40063 replace op0 with pc_rtx to signal that the instruction
40064 overwrites the whole destination and doesn't use its
40065 previous contents. */
40066 if (optimize)
40068 if (TREE_CODE (arg3) == INTEGER_CST)
40070 if (integer_all_onesp (arg3))
40071 op0 = pc_rtx;
40073 else if (TREE_CODE (arg3) == VECTOR_CST)
40075 unsigned int negative = 0;
40076 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40078 tree cst = VECTOR_CST_ELT (arg3, i);
40079 if (TREE_CODE (cst) == INTEGER_CST
40080 && tree_int_cst_sign_bit (cst))
40081 negative++;
40082 else if (TREE_CODE (cst) == REAL_CST
40083 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40084 negative++;
40086 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40087 op0 = pc_rtx;
40089 else if (TREE_CODE (arg3) == SSA_NAME
40090 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40092 /* Recognize also when mask is like:
40093 __v2df src = _mm_setzero_pd ();
40094 __v2df mask = _mm_cmpeq_pd (src, src);
40096 __v8sf src = _mm256_setzero_ps ();
40097 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40098 as that is a cheaper way to load all ones into
40099 a register than having to load a constant from
40100 memory. */
40101 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40102 if (is_gimple_call (def_stmt))
40104 tree fndecl = gimple_call_fndecl (def_stmt);
40105 if (fndecl
40106 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40107 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40109 case IX86_BUILTIN_CMPPD:
40110 case IX86_BUILTIN_CMPPS:
40111 case IX86_BUILTIN_CMPPD256:
40112 case IX86_BUILTIN_CMPPS256:
40113 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40114 break;
40115 /* FALLTHRU */
40116 case IX86_BUILTIN_CMPEQPD:
40117 case IX86_BUILTIN_CMPEQPS:
40118 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40119 && initializer_zerop (gimple_call_arg (def_stmt,
40120 1)))
40121 op0 = pc_rtx;
40122 break;
40123 default:
40124 break;
40130 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40131 if (! pat)
40132 return const0_rtx;
40133 emit_insn (pat);
40135 switch (fcode)
40137 case IX86_BUILTIN_GATHER3DIV16SF:
40138 if (target == NULL_RTX)
40139 target = gen_reg_rtx (V8SFmode);
40140 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40141 break;
40142 case IX86_BUILTIN_GATHER3DIV16SI:
40143 if (target == NULL_RTX)
40144 target = gen_reg_rtx (V8SImode);
40145 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40146 break;
40147 case IX86_BUILTIN_GATHER3DIV8SF:
40148 case IX86_BUILTIN_GATHERDIV8SF:
40149 if (target == NULL_RTX)
40150 target = gen_reg_rtx (V4SFmode);
40151 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40152 break;
40153 case IX86_BUILTIN_GATHER3DIV8SI:
40154 case IX86_BUILTIN_GATHERDIV8SI:
40155 if (target == NULL_RTX)
40156 target = gen_reg_rtx (V4SImode);
40157 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40158 break;
40159 default:
40160 target = subtarget;
40161 break;
40163 return target;
40165 scatter_gen:
40166 arg0 = CALL_EXPR_ARG (exp, 0);
40167 arg1 = CALL_EXPR_ARG (exp, 1);
40168 arg2 = CALL_EXPR_ARG (exp, 2);
40169 arg3 = CALL_EXPR_ARG (exp, 3);
40170 arg4 = CALL_EXPR_ARG (exp, 4);
40171 op0 = expand_normal (arg0);
40172 op1 = expand_normal (arg1);
40173 op2 = expand_normal (arg2);
40174 op3 = expand_normal (arg3);
40175 op4 = expand_normal (arg4);
40176 mode1 = insn_data[icode].operand[1].mode;
40177 mode2 = insn_data[icode].operand[2].mode;
40178 mode3 = insn_data[icode].operand[3].mode;
40179 mode4 = insn_data[icode].operand[4].mode;
40181 /* Force memory operand only with base register here. But we
40182 don't want to do it on memory operand for other builtin
40183 functions. */
40184 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40186 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40187 op0 = copy_to_mode_reg (Pmode, op0);
40189 op1 = fixup_modeless_constant (op1, mode1);
40191 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40193 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40194 op1 = copy_to_mode_reg (mode1, op1);
40196 else
40198 op1 = copy_to_reg (op1);
40199 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40202 if (!insn_data[icode].operand[2].predicate (op2, mode2))
40203 op2 = copy_to_mode_reg (mode2, op2);
40205 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40206 op3 = copy_to_mode_reg (mode3, op3);
40208 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40210 error ("the last argument must be scale 1, 2, 4, 8");
40211 return const0_rtx;
40214 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40215 if (! pat)
40216 return const0_rtx;
40218 emit_insn (pat);
40219 return 0;
40221 vec_prefetch_gen:
40222 arg0 = CALL_EXPR_ARG (exp, 0);
40223 arg1 = CALL_EXPR_ARG (exp, 1);
40224 arg2 = CALL_EXPR_ARG (exp, 2);
40225 arg3 = CALL_EXPR_ARG (exp, 3);
40226 arg4 = CALL_EXPR_ARG (exp, 4);
40227 op0 = expand_normal (arg0);
40228 op1 = expand_normal (arg1);
40229 op2 = expand_normal (arg2);
40230 op3 = expand_normal (arg3);
40231 op4 = expand_normal (arg4);
40232 mode0 = insn_data[icode].operand[0].mode;
40233 mode1 = insn_data[icode].operand[1].mode;
40234 mode3 = insn_data[icode].operand[3].mode;
40235 mode4 = insn_data[icode].operand[4].mode;
40237 op0 = fixup_modeless_constant (op0, mode0);
40239 if (GET_MODE (op0) == mode0
40240 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
40242 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40243 op0 = copy_to_mode_reg (mode0, op0);
40245 else if (op0 != constm1_rtx)
40247 op0 = copy_to_reg (op0);
40248 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40251 if (!insn_data[icode].operand[1].predicate (op1, mode1))
40252 op1 = copy_to_mode_reg (mode1, op1);
40254 /* Force memory operand only with base register here. But we
40255 don't want to do it on memory operand for other builtin
40256 functions. */
40257 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40259 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40260 op2 = copy_to_mode_reg (Pmode, op2);
40262 if (!insn_data[icode].operand[3].predicate (op3, mode3))
40264 error ("the forth argument must be scale 1, 2, 4, 8");
40265 return const0_rtx;
40268 if (!insn_data[icode].operand[4].predicate (op4, mode4))
40270 error ("incorrect hint operand");
40271 return const0_rtx;
40274 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40275 if (! pat)
40276 return const0_rtx;
40278 emit_insn (pat);
40280 return 0;
40282 case IX86_BUILTIN_XABORT:
40283 icode = CODE_FOR_xabort;
40284 arg0 = CALL_EXPR_ARG (exp, 0);
40285 op0 = expand_normal (arg0);
40286 mode0 = insn_data[icode].operand[0].mode;
40287 if (!insn_data[icode].operand[0].predicate (op0, mode0))
40289 error ("the xabort's argument must be an 8-bit immediate");
40290 return const0_rtx;
40292 emit_insn (gen_xabort (op0));
40293 return 0;
40295 default:
40296 break;
40299 for (i = 0, d = bdesc_special_args;
40300 i < ARRAY_SIZE (bdesc_special_args);
40301 i++, d++)
40302 if (d->code == fcode)
40303 return ix86_expand_special_args_builtin (d, exp, target);
40305 for (i = 0, d = bdesc_args;
40306 i < ARRAY_SIZE (bdesc_args);
40307 i++, d++)
40308 if (d->code == fcode)
40309 switch (fcode)
40311 case IX86_BUILTIN_FABSQ:
40312 case IX86_BUILTIN_COPYSIGNQ:
40313 if (!TARGET_SSE)
40314 /* Emit a normal call if SSE isn't available. */
40315 return expand_call (exp, target, ignore);
40316 default:
40317 return ix86_expand_args_builtin (d, exp, target);
40320 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40321 if (d->code == fcode)
40322 return ix86_expand_sse_comi (d, exp, target);
40324 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40325 if (d->code == fcode)
40326 return ix86_expand_round_builtin (d, exp, target);
40328 for (i = 0, d = bdesc_pcmpestr;
40329 i < ARRAY_SIZE (bdesc_pcmpestr);
40330 i++, d++)
40331 if (d->code == fcode)
40332 return ix86_expand_sse_pcmpestr (d, exp, target);
40334 for (i = 0, d = bdesc_pcmpistr;
40335 i < ARRAY_SIZE (bdesc_pcmpistr);
40336 i++, d++)
40337 if (d->code == fcode)
40338 return ix86_expand_sse_pcmpistr (d, exp, target);
40340 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40341 if (d->code == fcode)
40342 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40343 (enum ix86_builtin_func_type)
40344 d->flag, d->comparison);
40346 gcc_unreachable ();
40349 /* This returns the target-specific builtin with code CODE if
40350 current_function_decl has visibility on this builtin, which is checked
40351 using isa flags. Returns NULL_TREE otherwise. */
40353 static tree ix86_get_builtin (enum ix86_builtins code)
40355 struct cl_target_option *opts;
40356 tree target_tree = NULL_TREE;
40358 /* Determine the isa flags of current_function_decl. */
40360 if (current_function_decl)
40361 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40363 if (target_tree == NULL)
40364 target_tree = target_option_default_node;
40366 opts = TREE_TARGET_OPTION (target_tree);
40368 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40369 return ix86_builtin_decl (code, true);
40370 else
40371 return NULL_TREE;
40374 /* Return function decl for target specific builtin
40375 for given MPX builtin passed i FCODE. */
40376 static tree
40377 ix86_builtin_mpx_function (unsigned fcode)
40379 switch (fcode)
40381 case BUILT_IN_CHKP_BNDMK:
40382 return ix86_builtins[IX86_BUILTIN_BNDMK];
40384 case BUILT_IN_CHKP_BNDSTX:
40385 return ix86_builtins[IX86_BUILTIN_BNDSTX];
40387 case BUILT_IN_CHKP_BNDLDX:
40388 return ix86_builtins[IX86_BUILTIN_BNDLDX];
40390 case BUILT_IN_CHKP_BNDCL:
40391 return ix86_builtins[IX86_BUILTIN_BNDCL];
40393 case BUILT_IN_CHKP_BNDCU:
40394 return ix86_builtins[IX86_BUILTIN_BNDCU];
40396 case BUILT_IN_CHKP_BNDRET:
40397 return ix86_builtins[IX86_BUILTIN_BNDRET];
40399 case BUILT_IN_CHKP_INTERSECT:
40400 return ix86_builtins[IX86_BUILTIN_BNDINT];
40402 case BUILT_IN_CHKP_NARROW:
40403 return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40405 case BUILT_IN_CHKP_SIZEOF:
40406 return ix86_builtins[IX86_BUILTIN_SIZEOF];
40408 case BUILT_IN_CHKP_EXTRACT_LOWER:
40409 return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40411 case BUILT_IN_CHKP_EXTRACT_UPPER:
40412 return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40414 default:
40415 return NULL_TREE;
40418 gcc_unreachable ();
40421 /* Helper function for ix86_load_bounds and ix86_store_bounds.
40423 Return an address to be used to load/store bounds for pointer
40424 passed in SLOT.
40426 SLOT_NO is an integer constant holding number of a target
40427 dependent special slot to be used in case SLOT is not a memory.
40429 SPECIAL_BASE is a pointer to be used as a base of fake address
40430 to access special slots in Bounds Table. SPECIAL_BASE[-1],
40431 SPECIAL_BASE[-2] etc. will be used as fake pointer locations. */
40433 static rtx
40434 ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40436 rtx addr = NULL;
40438 /* NULL slot means we pass bounds for pointer not passed to the
40439 function at all. Register slot means we pass pointer in a
40440 register. In both these cases bounds are passed via Bounds
40441 Table. Since we do not have actual pointer stored in memory,
40442 we have to use fake addresses to access Bounds Table. We
40443 start with (special_base - sizeof (void*)) and decrease this
40444 address by pointer size to get addresses for other slots. */
40445 if (!slot || REG_P (slot))
40447 gcc_assert (CONST_INT_P (slot_no));
40448 addr = plus_constant (Pmode, special_base,
40449 -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40451 /* If pointer is passed in a memory then its address is used to
40452 access Bounds Table. */
40453 else if (MEM_P (slot))
40455 addr = XEXP (slot, 0);
40456 if (!register_operand (addr, Pmode))
40457 addr = copy_addr_to_reg (addr);
40459 else
40460 gcc_unreachable ();
40462 return addr;
40465 /* Expand pass uses this hook to load bounds for function parameter
40466 PTR passed in SLOT in case its bounds are not passed in a register.
40468 If SLOT is a memory, then bounds are loaded as for regular pointer
40469 loaded from memory. PTR may be NULL in case SLOT is a memory.
40470 In such case value of PTR (if required) may be loaded from SLOT.
40472 If SLOT is NULL or a register then SLOT_NO is an integer constant
40473 holding number of the target dependent special slot which should be
40474 used to obtain bounds.
40476 Return loaded bounds. */
40478 static rtx
40479 ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40481 rtx reg = gen_reg_rtx (BNDmode);
40482 rtx addr;
40484 /* Get address to be used to access Bounds Table. Special slots start
40485 at the location of return address of the current function. */
40486 addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40488 /* Load pointer value from a memory if we don't have it. */
40489 if (!ptr)
40491 gcc_assert (MEM_P (slot));
40492 ptr = copy_addr_to_reg (slot);
40495 if (!register_operand (ptr, Pmode))
40496 ptr = ix86_zero_extend_to_Pmode (ptr);
40498 emit_insn (BNDmode == BND64mode
40499 ? gen_bnd64_ldx (reg, addr, ptr)
40500 : gen_bnd32_ldx (reg, addr, ptr));
40502 return reg;
40505 /* Expand pass uses this hook to store BOUNDS for call argument PTR
40506 passed in SLOT in case BOUNDS are not passed in a register.
40508 If SLOT is a memory, then BOUNDS are stored as for regular pointer
40509 stored in memory. PTR may be NULL in case SLOT is a memory.
40510 In such case value of PTR (if required) may be loaded from SLOT.
40512 If SLOT is NULL or a register then SLOT_NO is an integer constant
40513 holding number of the target dependent special slot which should be
40514 used to store BOUNDS. */
40516 static void
40517 ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40519 rtx addr;
40521 /* Get address to be used to access Bounds Table. Special slots start
40522 at the location of return address of a called function. */
40523 addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40525 /* Load pointer value from a memory if we don't have it. */
40526 if (!ptr)
40528 gcc_assert (MEM_P (slot));
40529 ptr = copy_addr_to_reg (slot);
40532 if (!register_operand (ptr, Pmode))
40533 ptr = ix86_zero_extend_to_Pmode (ptr);
40535 gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40536 if (!register_operand (bounds, BNDmode))
40537 bounds = copy_to_mode_reg (BNDmode, bounds);
40539 emit_insn (BNDmode == BND64mode
40540 ? gen_bnd64_stx (addr, ptr, bounds)
40541 : gen_bnd32_stx (addr, ptr, bounds));
40544 /* Load and return bounds returned by function in SLOT. */
40546 static rtx
40547 ix86_load_returned_bounds (rtx slot)
40549 rtx res;
40551 gcc_assert (REG_P (slot));
40552 res = gen_reg_rtx (BNDmode);
40553 emit_move_insn (res, slot);
40555 return res;
40558 /* Store BOUNDS returned by function into SLOT. */
40560 static void
40561 ix86_store_returned_bounds (rtx slot, rtx bounds)
40563 gcc_assert (REG_P (slot));
40564 emit_move_insn (slot, bounds);
40567 /* Returns a function decl for a vectorized version of the builtin function
40568 with builtin function code FN and the result vector type TYPE, or NULL_TREE
40569 if it is not available. */
40571 static tree
40572 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40573 tree type_in)
40575 machine_mode in_mode, out_mode;
40576 int in_n, out_n;
40577 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40579 if (TREE_CODE (type_out) != VECTOR_TYPE
40580 || TREE_CODE (type_in) != VECTOR_TYPE
40581 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40582 return NULL_TREE;
40584 out_mode = TYPE_MODE (TREE_TYPE (type_out));
40585 out_n = TYPE_VECTOR_SUBPARTS (type_out);
40586 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40587 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40589 switch (fn)
40591 case BUILT_IN_SQRT:
40592 if (out_mode == DFmode && in_mode == DFmode)
40594 if (out_n == 2 && in_n == 2)
40595 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40596 else if (out_n == 4 && in_n == 4)
40597 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40598 else if (out_n == 8 && in_n == 8)
40599 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40601 break;
40603 case BUILT_IN_EXP2F:
40604 if (out_mode == SFmode && in_mode == SFmode)
40606 if (out_n == 16 && in_n == 16)
40607 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40609 break;
40611 case BUILT_IN_SQRTF:
40612 if (out_mode == SFmode && in_mode == SFmode)
40614 if (out_n == 4 && in_n == 4)
40615 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40616 else if (out_n == 8 && in_n == 8)
40617 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40618 else if (out_n == 16 && in_n == 16)
40619 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40621 break;
40623 case BUILT_IN_IFLOOR:
40624 case BUILT_IN_LFLOOR:
40625 case BUILT_IN_LLFLOOR:
40626 /* The round insn does not trap on denormals. */
40627 if (flag_trapping_math || !TARGET_ROUND)
40628 break;
40630 if (out_mode == SImode && in_mode == DFmode)
40632 if (out_n == 4 && in_n == 2)
40633 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40634 else if (out_n == 8 && in_n == 4)
40635 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40636 else if (out_n == 16 && in_n == 8)
40637 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40639 break;
40641 case BUILT_IN_IFLOORF:
40642 case BUILT_IN_LFLOORF:
40643 case BUILT_IN_LLFLOORF:
40644 /* The round insn does not trap on denormals. */
40645 if (flag_trapping_math || !TARGET_ROUND)
40646 break;
40648 if (out_mode == SImode && in_mode == SFmode)
40650 if (out_n == 4 && in_n == 4)
40651 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40652 else if (out_n == 8 && in_n == 8)
40653 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40655 break;
40657 case BUILT_IN_ICEIL:
40658 case BUILT_IN_LCEIL:
40659 case BUILT_IN_LLCEIL:
40660 /* The round insn does not trap on denormals. */
40661 if (flag_trapping_math || !TARGET_ROUND)
40662 break;
40664 if (out_mode == SImode && in_mode == DFmode)
40666 if (out_n == 4 && in_n == 2)
40667 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40668 else if (out_n == 8 && in_n == 4)
40669 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40670 else if (out_n == 16 && in_n == 8)
40671 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40673 break;
40675 case BUILT_IN_ICEILF:
40676 case BUILT_IN_LCEILF:
40677 case BUILT_IN_LLCEILF:
40678 /* The round insn does not trap on denormals. */
40679 if (flag_trapping_math || !TARGET_ROUND)
40680 break;
40682 if (out_mode == SImode && in_mode == SFmode)
40684 if (out_n == 4 && in_n == 4)
40685 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40686 else if (out_n == 8 && in_n == 8)
40687 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40689 break;
40691 case BUILT_IN_IRINT:
40692 case BUILT_IN_LRINT:
40693 case BUILT_IN_LLRINT:
40694 if (out_mode == SImode && in_mode == DFmode)
40696 if (out_n == 4 && in_n == 2)
40697 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40698 else if (out_n == 8 && in_n == 4)
40699 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40701 break;
40703 case BUILT_IN_IRINTF:
40704 case BUILT_IN_LRINTF:
40705 case BUILT_IN_LLRINTF:
40706 if (out_mode == SImode && in_mode == SFmode)
40708 if (out_n == 4 && in_n == 4)
40709 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40710 else if (out_n == 8 && in_n == 8)
40711 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40713 break;
40715 case BUILT_IN_IROUND:
40716 case BUILT_IN_LROUND:
40717 case BUILT_IN_LLROUND:
40718 /* The round insn does not trap on denormals. */
40719 if (flag_trapping_math || !TARGET_ROUND)
40720 break;
40722 if (out_mode == SImode && in_mode == DFmode)
40724 if (out_n == 4 && in_n == 2)
40725 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40726 else if (out_n == 8 && in_n == 4)
40727 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40728 else if (out_n == 16 && in_n == 8)
40729 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40731 break;
40733 case BUILT_IN_IROUNDF:
40734 case BUILT_IN_LROUNDF:
40735 case BUILT_IN_LLROUNDF:
40736 /* The round insn does not trap on denormals. */
40737 if (flag_trapping_math || !TARGET_ROUND)
40738 break;
40740 if (out_mode == SImode && in_mode == SFmode)
40742 if (out_n == 4 && in_n == 4)
40743 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40744 else if (out_n == 8 && in_n == 8)
40745 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40747 break;
40749 case BUILT_IN_COPYSIGN:
40750 if (out_mode == DFmode && in_mode == DFmode)
40752 if (out_n == 2 && in_n == 2)
40753 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40754 else if (out_n == 4 && in_n == 4)
40755 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40756 else if (out_n == 8 && in_n == 8)
40757 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40759 break;
40761 case BUILT_IN_COPYSIGNF:
40762 if (out_mode == SFmode && in_mode == SFmode)
40764 if (out_n == 4 && in_n == 4)
40765 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40766 else if (out_n == 8 && in_n == 8)
40767 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40768 else if (out_n == 16 && in_n == 16)
40769 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40771 break;
40773 case BUILT_IN_FLOOR:
40774 /* The round insn does not trap on denormals. */
40775 if (flag_trapping_math || !TARGET_ROUND)
40776 break;
40778 if (out_mode == DFmode && in_mode == DFmode)
40780 if (out_n == 2 && in_n == 2)
40781 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40782 else if (out_n == 4 && in_n == 4)
40783 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40785 break;
40787 case BUILT_IN_FLOORF:
40788 /* The round insn does not trap on denormals. */
40789 if (flag_trapping_math || !TARGET_ROUND)
40790 break;
40792 if (out_mode == SFmode && in_mode == SFmode)
40794 if (out_n == 4 && in_n == 4)
40795 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40796 else if (out_n == 8 && in_n == 8)
40797 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40799 break;
40801 case BUILT_IN_CEIL:
40802 /* The round insn does not trap on denormals. */
40803 if (flag_trapping_math || !TARGET_ROUND)
40804 break;
40806 if (out_mode == DFmode && in_mode == DFmode)
40808 if (out_n == 2 && in_n == 2)
40809 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40810 else if (out_n == 4 && in_n == 4)
40811 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40813 break;
40815 case BUILT_IN_CEILF:
40816 /* The round insn does not trap on denormals. */
40817 if (flag_trapping_math || !TARGET_ROUND)
40818 break;
40820 if (out_mode == SFmode && in_mode == SFmode)
40822 if (out_n == 4 && in_n == 4)
40823 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40824 else if (out_n == 8 && in_n == 8)
40825 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40827 break;
40829 case BUILT_IN_TRUNC:
40830 /* The round insn does not trap on denormals. */
40831 if (flag_trapping_math || !TARGET_ROUND)
40832 break;
40834 if (out_mode == DFmode && in_mode == DFmode)
40836 if (out_n == 2 && in_n == 2)
40837 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40838 else if (out_n == 4 && in_n == 4)
40839 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40841 break;
40843 case BUILT_IN_TRUNCF:
40844 /* The round insn does not trap on denormals. */
40845 if (flag_trapping_math || !TARGET_ROUND)
40846 break;
40848 if (out_mode == SFmode && in_mode == SFmode)
40850 if (out_n == 4 && in_n == 4)
40851 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40852 else if (out_n == 8 && in_n == 8)
40853 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40855 break;
40857 case BUILT_IN_RINT:
40858 /* The round insn does not trap on denormals. */
40859 if (flag_trapping_math || !TARGET_ROUND)
40860 break;
40862 if (out_mode == DFmode && in_mode == DFmode)
40864 if (out_n == 2 && in_n == 2)
40865 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40866 else if (out_n == 4 && in_n == 4)
40867 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40869 break;
40871 case BUILT_IN_RINTF:
40872 /* The round insn does not trap on denormals. */
40873 if (flag_trapping_math || !TARGET_ROUND)
40874 break;
40876 if (out_mode == SFmode && in_mode == SFmode)
40878 if (out_n == 4 && in_n == 4)
40879 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40880 else if (out_n == 8 && in_n == 8)
40881 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40883 break;
40885 case BUILT_IN_ROUND:
40886 /* The round insn does not trap on denormals. */
40887 if (flag_trapping_math || !TARGET_ROUND)
40888 break;
40890 if (out_mode == DFmode && in_mode == DFmode)
40892 if (out_n == 2 && in_n == 2)
40893 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40894 else if (out_n == 4 && in_n == 4)
40895 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40897 break;
40899 case BUILT_IN_ROUNDF:
40900 /* The round insn does not trap on denormals. */
40901 if (flag_trapping_math || !TARGET_ROUND)
40902 break;
40904 if (out_mode == SFmode && in_mode == SFmode)
40906 if (out_n == 4 && in_n == 4)
40907 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40908 else if (out_n == 8 && in_n == 8)
40909 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40911 break;
40913 case BUILT_IN_FMA:
40914 if (out_mode == DFmode && in_mode == DFmode)
40916 if (out_n == 2 && in_n == 2)
40917 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40918 if (out_n == 4 && in_n == 4)
40919 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40921 break;
40923 case BUILT_IN_FMAF:
40924 if (out_mode == SFmode && in_mode == SFmode)
40926 if (out_n == 4 && in_n == 4)
40927 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40928 if (out_n == 8 && in_n == 8)
40929 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40931 break;
40933 default:
40934 break;
40937 /* Dispatch to a handler for a vectorization library. */
40938 if (ix86_veclib_handler)
40939 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40940 type_in);
40942 return NULL_TREE;
40945 /* Handler for an SVML-style interface to
40946 a library with vectorized intrinsics. */
40948 static tree
40949 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40951 char name[20];
40952 tree fntype, new_fndecl, args;
40953 unsigned arity;
40954 const char *bname;
40955 machine_mode el_mode, in_mode;
40956 int n, in_n;
40958 /* The SVML is suitable for unsafe math only. */
40959 if (!flag_unsafe_math_optimizations)
40960 return NULL_TREE;
40962 el_mode = TYPE_MODE (TREE_TYPE (type_out));
40963 n = TYPE_VECTOR_SUBPARTS (type_out);
40964 in_mode = TYPE_MODE (TREE_TYPE (type_in));
40965 in_n = TYPE_VECTOR_SUBPARTS (type_in);
40966 if (el_mode != in_mode
40967 || n != in_n)
40968 return NULL_TREE;
40970 switch (fn)
40972 case BUILT_IN_EXP:
40973 case BUILT_IN_LOG:
40974 case BUILT_IN_LOG10:
40975 case BUILT_IN_POW:
40976 case BUILT_IN_TANH:
40977 case BUILT_IN_TAN:
40978 case BUILT_IN_ATAN:
40979 case BUILT_IN_ATAN2:
40980 case BUILT_IN_ATANH:
40981 case BUILT_IN_CBRT:
40982 case BUILT_IN_SINH:
40983 case BUILT_IN_SIN:
40984 case BUILT_IN_ASINH:
40985 case BUILT_IN_ASIN:
40986 case BUILT_IN_COSH:
40987 case BUILT_IN_COS:
40988 case BUILT_IN_ACOSH:
40989 case BUILT_IN_ACOS:
40990 if (el_mode != DFmode || n != 2)
40991 return NULL_TREE;
40992 break;
40994 case BUILT_IN_EXPF:
40995 case BUILT_IN_LOGF:
40996 case BUILT_IN_LOG10F:
40997 case BUILT_IN_POWF:
40998 case BUILT_IN_TANHF:
40999 case BUILT_IN_TANF:
41000 case BUILT_IN_ATANF:
41001 case BUILT_IN_ATAN2F:
41002 case BUILT_IN_ATANHF:
41003 case BUILT_IN_CBRTF:
41004 case BUILT_IN_SINHF:
41005 case BUILT_IN_SINF:
41006 case BUILT_IN_ASINHF:
41007 case BUILT_IN_ASINF:
41008 case BUILT_IN_COSHF:
41009 case BUILT_IN_COSF:
41010 case BUILT_IN_ACOSHF:
41011 case BUILT_IN_ACOSF:
41012 if (el_mode != SFmode || n != 4)
41013 return NULL_TREE;
41014 break;
41016 default:
41017 return NULL_TREE;
41020 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41022 if (fn == BUILT_IN_LOGF)
41023 strcpy (name, "vmlsLn4");
41024 else if (fn == BUILT_IN_LOG)
41025 strcpy (name, "vmldLn2");
41026 else if (n == 4)
41028 sprintf (name, "vmls%s", bname+10);
41029 name[strlen (name)-1] = '4';
41031 else
41032 sprintf (name, "vmld%s2", bname+10);
41034 /* Convert to uppercase. */
41035 name[4] &= ~0x20;
41037 arity = 0;
41038 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41039 args;
41040 args = TREE_CHAIN (args))
41041 arity++;
41043 if (arity == 1)
41044 fntype = build_function_type_list (type_out, type_in, NULL);
41045 else
41046 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41048 /* Build a function declaration for the vectorized function. */
41049 new_fndecl = build_decl (BUILTINS_LOCATION,
41050 FUNCTION_DECL, get_identifier (name), fntype);
41051 TREE_PUBLIC (new_fndecl) = 1;
41052 DECL_EXTERNAL (new_fndecl) = 1;
41053 DECL_IS_NOVOPS (new_fndecl) = 1;
41054 TREE_READONLY (new_fndecl) = 1;
41056 return new_fndecl;
41059 /* Handler for an ACML-style interface to
41060 a library with vectorized intrinsics. */
41062 static tree
41063 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41065 char name[20] = "__vr.._";
41066 tree fntype, new_fndecl, args;
41067 unsigned arity;
41068 const char *bname;
41069 machine_mode el_mode, in_mode;
41070 int n, in_n;
41072 /* The ACML is 64bits only and suitable for unsafe math only as
41073 it does not correctly support parts of IEEE with the required
41074 precision such as denormals. */
41075 if (!TARGET_64BIT
41076 || !flag_unsafe_math_optimizations)
41077 return NULL_TREE;
41079 el_mode = TYPE_MODE (TREE_TYPE (type_out));
41080 n = TYPE_VECTOR_SUBPARTS (type_out);
41081 in_mode = TYPE_MODE (TREE_TYPE (type_in));
41082 in_n = TYPE_VECTOR_SUBPARTS (type_in);
41083 if (el_mode != in_mode
41084 || n != in_n)
41085 return NULL_TREE;
41087 switch (fn)
41089 case BUILT_IN_SIN:
41090 case BUILT_IN_COS:
41091 case BUILT_IN_EXP:
41092 case BUILT_IN_LOG:
41093 case BUILT_IN_LOG2:
41094 case BUILT_IN_LOG10:
41095 name[4] = 'd';
41096 name[5] = '2';
41097 if (el_mode != DFmode
41098 || n != 2)
41099 return NULL_TREE;
41100 break;
41102 case BUILT_IN_SINF:
41103 case BUILT_IN_COSF:
41104 case BUILT_IN_EXPF:
41105 case BUILT_IN_POWF:
41106 case BUILT_IN_LOGF:
41107 case BUILT_IN_LOG2F:
41108 case BUILT_IN_LOG10F:
41109 name[4] = 's';
41110 name[5] = '4';
41111 if (el_mode != SFmode
41112 || n != 4)
41113 return NULL_TREE;
41114 break;
41116 default:
41117 return NULL_TREE;
41120 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41121 sprintf (name + 7, "%s", bname+10);
41123 arity = 0;
41124 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41125 args;
41126 args = TREE_CHAIN (args))
41127 arity++;
41129 if (arity == 1)
41130 fntype = build_function_type_list (type_out, type_in, NULL);
41131 else
41132 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41134 /* Build a function declaration for the vectorized function. */
41135 new_fndecl = build_decl (BUILTINS_LOCATION,
41136 FUNCTION_DECL, get_identifier (name), fntype);
41137 TREE_PUBLIC (new_fndecl) = 1;
41138 DECL_EXTERNAL (new_fndecl) = 1;
41139 DECL_IS_NOVOPS (new_fndecl) = 1;
41140 TREE_READONLY (new_fndecl) = 1;
41142 return new_fndecl;
41145 /* Returns a decl of a function that implements gather load with
41146 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41147 Return NULL_TREE if it is not available. */
41149 static tree
41150 ix86_vectorize_builtin_gather (const_tree mem_vectype,
41151 const_tree index_type, int scale)
41153 bool si;
41154 enum ix86_builtins code;
41156 if (! TARGET_AVX2)
41157 return NULL_TREE;
41159 if ((TREE_CODE (index_type) != INTEGER_TYPE
41160 && !POINTER_TYPE_P (index_type))
41161 || (TYPE_MODE (index_type) != SImode
41162 && TYPE_MODE (index_type) != DImode))
41163 return NULL_TREE;
41165 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41166 return NULL_TREE;
41168 /* v*gather* insn sign extends index to pointer mode. */
41169 if (TYPE_PRECISION (index_type) < POINTER_SIZE
41170 && TYPE_UNSIGNED (index_type))
41171 return NULL_TREE;
41173 if (scale <= 0
41174 || scale > 8
41175 || (scale & (scale - 1)) != 0)
41176 return NULL_TREE;
41178 si = TYPE_MODE (index_type) == SImode;
41179 switch (TYPE_MODE (mem_vectype))
41181 case V2DFmode:
41182 if (TARGET_AVX512VL)
41183 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41184 else
41185 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41186 break;
41187 case V4DFmode:
41188 if (TARGET_AVX512VL)
41189 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41190 else
41191 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41192 break;
41193 case V2DImode:
41194 if (TARGET_AVX512VL)
41195 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41196 else
41197 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41198 break;
41199 case V4DImode:
41200 if (TARGET_AVX512VL)
41201 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41202 else
41203 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41204 break;
41205 case V4SFmode:
41206 if (TARGET_AVX512VL)
41207 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41208 else
41209 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41210 break;
41211 case V8SFmode:
41212 if (TARGET_AVX512VL)
41213 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41214 else
41215 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41216 break;
41217 case V4SImode:
41218 if (TARGET_AVX512VL)
41219 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41220 else
41221 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41222 break;
41223 case V8SImode:
41224 if (TARGET_AVX512VL)
41225 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41226 else
41227 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41228 break;
41229 case V8DFmode:
41230 if (TARGET_AVX512F)
41231 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41232 else
41233 return NULL_TREE;
41234 break;
41235 case V8DImode:
41236 if (TARGET_AVX512F)
41237 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41238 else
41239 return NULL_TREE;
41240 break;
41241 case V16SFmode:
41242 if (TARGET_AVX512F)
41243 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41244 else
41245 return NULL_TREE;
41246 break;
41247 case V16SImode:
41248 if (TARGET_AVX512F)
41249 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41250 else
41251 return NULL_TREE;
41252 break;
41253 default:
41254 return NULL_TREE;
41257 return ix86_get_builtin (code);
41260 /* Returns a code for a target-specific builtin that implements
41261 reciprocal of the function, or NULL_TREE if not available. */
41263 static tree
41264 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41266 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41267 && flag_finite_math_only && !flag_trapping_math
41268 && flag_unsafe_math_optimizations))
41269 return NULL_TREE;
41271 if (md_fn)
41272 /* Machine dependent builtins. */
41273 switch (fn)
41275 /* Vectorized version of sqrt to rsqrt conversion. */
41276 case IX86_BUILTIN_SQRTPS_NR:
41277 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41279 case IX86_BUILTIN_SQRTPS_NR256:
41280 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41282 default:
41283 return NULL_TREE;
41285 else
41286 /* Normal builtins. */
41287 switch (fn)
41289 /* Sqrt to rsqrt conversion. */
41290 case BUILT_IN_SQRTF:
41291 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41293 default:
41294 return NULL_TREE;
41298 /* Helper for avx_vpermilps256_operand et al. This is also used by
41299 the expansion functions to turn the parallel back into a mask.
41300 The return value is 0 for no match and the imm8+1 for a match. */
41303 avx_vpermilp_parallel (rtx par, machine_mode mode)
41305 unsigned i, nelt = GET_MODE_NUNITS (mode);
41306 unsigned mask = 0;
41307 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
41309 if (XVECLEN (par, 0) != (int) nelt)
41310 return 0;
41312 /* Validate that all of the elements are constants, and not totally
41313 out of range. Copy the data into an integral array to make the
41314 subsequent checks easier. */
41315 for (i = 0; i < nelt; ++i)
41317 rtx er = XVECEXP (par, 0, i);
41318 unsigned HOST_WIDE_INT ei;
41320 if (!CONST_INT_P (er))
41321 return 0;
41322 ei = INTVAL (er);
41323 if (ei >= nelt)
41324 return 0;
41325 ipar[i] = ei;
41328 switch (mode)
41330 case V8DFmode:
41331 /* In the 512-bit DFmode case, we can only move elements within
41332 a 128-bit lane. First fill the second part of the mask,
41333 then fallthru. */
41334 for (i = 4; i < 6; ++i)
41336 if (ipar[i] < 4 || ipar[i] >= 6)
41337 return 0;
41338 mask |= (ipar[i] - 4) << i;
41340 for (i = 6; i < 8; ++i)
41342 if (ipar[i] < 6)
41343 return 0;
41344 mask |= (ipar[i] - 6) << i;
41346 /* FALLTHRU */
41348 case V4DFmode:
41349 /* In the 256-bit DFmode case, we can only move elements within
41350 a 128-bit lane. */
41351 for (i = 0; i < 2; ++i)
41353 if (ipar[i] >= 2)
41354 return 0;
41355 mask |= ipar[i] << i;
41357 for (i = 2; i < 4; ++i)
41359 if (ipar[i] < 2)
41360 return 0;
41361 mask |= (ipar[i] - 2) << i;
41363 break;
41365 case V16SFmode:
41366 /* In 512 bit SFmode case, permutation in the upper 256 bits
41367 must mirror the permutation in the lower 256-bits. */
41368 for (i = 0; i < 8; ++i)
41369 if (ipar[i] + 8 != ipar[i + 8])
41370 return 0;
41371 /* FALLTHRU */
41373 case V8SFmode:
41374 /* In 256 bit SFmode case, we have full freedom of
41375 movement within the low 128-bit lane, but the high 128-bit
41376 lane must mirror the exact same pattern. */
41377 for (i = 0; i < 4; ++i)
41378 if (ipar[i] + 4 != ipar[i + 4])
41379 return 0;
41380 nelt = 4;
41381 /* FALLTHRU */
41383 case V2DFmode:
41384 case V4SFmode:
41385 /* In the 128-bit case, we've full freedom in the placement of
41386 the elements from the source operand. */
41387 for (i = 0; i < nelt; ++i)
41388 mask |= ipar[i] << (i * (nelt / 2));
41389 break;
41391 default:
41392 gcc_unreachable ();
41395 /* Make sure success has a non-zero value by adding one. */
41396 return mask + 1;
41399 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
41400 the expansion functions to turn the parallel back into a mask.
41401 The return value is 0 for no match and the imm8+1 for a match. */
41404 avx_vperm2f128_parallel (rtx par, machine_mode mode)
41406 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41407 unsigned mask = 0;
41408 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
41410 if (XVECLEN (par, 0) != (int) nelt)
41411 return 0;
41413 /* Validate that all of the elements are constants, and not totally
41414 out of range. Copy the data into an integral array to make the
41415 subsequent checks easier. */
41416 for (i = 0; i < nelt; ++i)
41418 rtx er = XVECEXP (par, 0, i);
41419 unsigned HOST_WIDE_INT ei;
41421 if (!CONST_INT_P (er))
41422 return 0;
41423 ei = INTVAL (er);
41424 if (ei >= 2 * nelt)
41425 return 0;
41426 ipar[i] = ei;
41429 /* Validate that the halves of the permute are halves. */
41430 for (i = 0; i < nelt2 - 1; ++i)
41431 if (ipar[i] + 1 != ipar[i + 1])
41432 return 0;
41433 for (i = nelt2; i < nelt - 1; ++i)
41434 if (ipar[i] + 1 != ipar[i + 1])
41435 return 0;
41437 /* Reconstruct the mask. */
41438 for (i = 0; i < 2; ++i)
41440 unsigned e = ipar[i * nelt2];
41441 if (e % nelt2)
41442 return 0;
41443 e /= nelt2;
41444 mask |= e << (i * 4);
41447 /* Make sure success has a non-zero value by adding one. */
41448 return mask + 1;
41451 /* Return a register priority for hard reg REGNO. */
41452 static int
41453 ix86_register_priority (int hard_regno)
41455 /* ebp and r13 as the base always wants a displacement, r12 as the
41456 base always wants an index. So discourage their usage in an
41457 address. */
41458 if (hard_regno == R12_REG || hard_regno == R13_REG)
41459 return 0;
41460 if (hard_regno == BP_REG)
41461 return 1;
41462 /* New x86-64 int registers result in bigger code size. Discourage
41463 them. */
41464 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41465 return 2;
41466 /* New x86-64 SSE registers result in bigger code size. Discourage
41467 them. */
41468 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41469 return 2;
41470 /* Usage of AX register results in smaller code. Prefer it. */
41471 if (hard_regno == AX_REG)
41472 return 4;
41473 return 3;
41476 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
41478 Put float CONST_DOUBLE in the constant pool instead of fp regs.
41479 QImode must go into class Q_REGS.
41480 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
41481 movdf to do mem-to-mem moves through integer regs. */
41483 static reg_class_t
41484 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41486 machine_mode mode = GET_MODE (x);
41488 /* We're only allowed to return a subclass of CLASS. Many of the
41489 following checks fail for NO_REGS, so eliminate that early. */
41490 if (regclass == NO_REGS)
41491 return NO_REGS;
41493 /* All classes can load zeros. */
41494 if (x == CONST0_RTX (mode))
41495 return regclass;
41497 /* Force constants into memory if we are loading a (nonzero) constant into
41498 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
41499 instructions to load from a constant. */
41500 if (CONSTANT_P (x)
41501 && (MAYBE_MMX_CLASS_P (regclass)
41502 || MAYBE_SSE_CLASS_P (regclass)
41503 || MAYBE_MASK_CLASS_P (regclass)))
41504 return NO_REGS;
41506 /* Prefer SSE regs only, if we can use them for math. */
41507 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41508 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41510 /* Floating-point constants need more complex checks. */
41511 if (CONST_DOUBLE_P (x))
41513 /* General regs can load everything. */
41514 if (reg_class_subset_p (regclass, GENERAL_REGS))
41515 return regclass;
41517 /* Floats can load 0 and 1 plus some others. Note that we eliminated
41518 zero above. We only want to wind up preferring 80387 registers if
41519 we plan on doing computation with them. */
41520 if (TARGET_80387
41521 && standard_80387_constant_p (x) > 0)
41523 /* Limit class to non-sse. */
41524 if (regclass == FLOAT_SSE_REGS)
41525 return FLOAT_REGS;
41526 if (regclass == FP_TOP_SSE_REGS)
41527 return FP_TOP_REG;
41528 if (regclass == FP_SECOND_SSE_REGS)
41529 return FP_SECOND_REG;
41530 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41531 return regclass;
41534 return NO_REGS;
41537 /* Generally when we see PLUS here, it's the function invariant
41538 (plus soft-fp const_int). Which can only be computed into general
41539 regs. */
41540 if (GET_CODE (x) == PLUS)
41541 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41543 /* QImode constants are easy to load, but non-constant QImode data
41544 must go into Q_REGS. */
41545 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41547 if (reg_class_subset_p (regclass, Q_REGS))
41548 return regclass;
41549 if (reg_class_subset_p (Q_REGS, regclass))
41550 return Q_REGS;
41551 return NO_REGS;
41554 return regclass;
41557 /* Discourage putting floating-point values in SSE registers unless
41558 SSE math is being used, and likewise for the 387 registers. */
41559 static reg_class_t
41560 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41562 machine_mode mode = GET_MODE (x);
41564 /* Restrict the output reload class to the register bank that we are doing
41565 math on. If we would like not to return a subset of CLASS, reject this
41566 alternative: if reload cannot do this, it will still use its choice. */
41567 mode = GET_MODE (x);
41568 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41569 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41571 if (X87_FLOAT_MODE_P (mode))
41573 if (regclass == FP_TOP_SSE_REGS)
41574 return FP_TOP_REG;
41575 else if (regclass == FP_SECOND_SSE_REGS)
41576 return FP_SECOND_REG;
41577 else
41578 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41581 return regclass;
41584 static reg_class_t
41585 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41586 machine_mode mode, secondary_reload_info *sri)
41588 /* Double-word spills from general registers to non-offsettable memory
41589 references (zero-extended addresses) require special handling. */
41590 if (TARGET_64BIT
41591 && MEM_P (x)
41592 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41593 && INTEGER_CLASS_P (rclass)
41594 && !offsettable_memref_p (x))
41596 sri->icode = (in_p
41597 ? CODE_FOR_reload_noff_load
41598 : CODE_FOR_reload_noff_store);
41599 /* Add the cost of moving address to a temporary. */
41600 sri->extra_cost = 1;
41602 return NO_REGS;
41605 /* QImode spills from non-QI registers require
41606 intermediate register on 32bit targets. */
41607 if (mode == QImode
41608 && (MAYBE_MASK_CLASS_P (rclass)
41609 || (!TARGET_64BIT && !in_p
41610 && INTEGER_CLASS_P (rclass)
41611 && MAYBE_NON_Q_CLASS_P (rclass))))
41613 int regno;
41615 if (REG_P (x))
41616 regno = REGNO (x);
41617 else
41618 regno = -1;
41620 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41621 regno = true_regnum (x);
41623 /* Return Q_REGS if the operand is in memory. */
41624 if (regno == -1)
41625 return Q_REGS;
41628 /* This condition handles corner case where an expression involving
41629 pointers gets vectorized. We're trying to use the address of a
41630 stack slot as a vector initializer.
41632 (set (reg:V2DI 74 [ vect_cst_.2 ])
41633 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41635 Eventually frame gets turned into sp+offset like this:
41637 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41638 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41639 (const_int 392 [0x188]))))
41641 That later gets turned into:
41643 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41644 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41645 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41647 We'll have the following reload recorded:
41649 Reload 0: reload_in (DI) =
41650 (plus:DI (reg/f:DI 7 sp)
41651 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41652 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41653 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41654 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41655 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41656 reload_reg_rtx: (reg:V2DI 22 xmm1)
41658 Which isn't going to work since SSE instructions can't handle scalar
41659 additions. Returning GENERAL_REGS forces the addition into integer
41660 register and reload can handle subsequent reloads without problems. */
41662 if (in_p && GET_CODE (x) == PLUS
41663 && SSE_CLASS_P (rclass)
41664 && SCALAR_INT_MODE_P (mode))
41665 return GENERAL_REGS;
41667 return NO_REGS;
41670 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
41672 static bool
41673 ix86_class_likely_spilled_p (reg_class_t rclass)
41675 switch (rclass)
41677 case AREG:
41678 case DREG:
41679 case CREG:
41680 case BREG:
41681 case AD_REGS:
41682 case SIREG:
41683 case DIREG:
41684 case SSE_FIRST_REG:
41685 case FP_TOP_REG:
41686 case FP_SECOND_REG:
41687 case BND_REGS:
41688 return true;
41690 default:
41691 break;
41694 return false;
41697 /* If we are copying between general and FP registers, we need a memory
41698 location. The same is true for SSE and MMX registers.
41700 To optimize register_move_cost performance, allow inline variant.
41702 The macro can't work reliably when one of the CLASSES is class containing
41703 registers from multiple units (SSE, MMX, integer). We avoid this by never
41704 combining those units in single alternative in the machine description.
41705 Ensure that this constraint holds to avoid unexpected surprises.
41707 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41708 enforce these sanity checks. */
41710 static inline bool
41711 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41712 machine_mode mode, int strict)
41714 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41715 return false;
41716 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41717 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41718 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41719 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41720 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41721 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41723 gcc_assert (!strict || lra_in_progress);
41724 return true;
41727 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41728 return true;
41730 /* Between mask and general, we have moves no larger than word size. */
41731 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41732 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41733 return true;
41735 /* ??? This is a lie. We do have moves between mmx/general, and for
41736 mmx/sse2. But by saying we need secondary memory we discourage the
41737 register allocator from using the mmx registers unless needed. */
41738 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41739 return true;
41741 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41743 /* SSE1 doesn't have any direct moves from other classes. */
41744 if (!TARGET_SSE2)
41745 return true;
41747 /* If the target says that inter-unit moves are more expensive
41748 than moving through memory, then don't generate them. */
41749 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41750 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41751 return true;
41753 /* Between SSE and general, we have moves no larger than word size. */
41754 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41755 return true;
41758 return false;
41761 bool
41762 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41763 machine_mode mode, int strict)
41765 return inline_secondary_memory_needed (class1, class2, mode, strict);
41768 /* Implement the TARGET_CLASS_MAX_NREGS hook.
41770 On the 80386, this is the size of MODE in words,
41771 except in the FP regs, where a single reg is always enough. */
41773 static unsigned char
41774 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41776 if (MAYBE_INTEGER_CLASS_P (rclass))
41778 if (mode == XFmode)
41779 return (TARGET_64BIT ? 2 : 3);
41780 else if (mode == XCmode)
41781 return (TARGET_64BIT ? 4 : 6);
41782 else
41783 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41785 else
41787 if (COMPLEX_MODE_P (mode))
41788 return 2;
41789 else
41790 return 1;
41794 /* Return true if the registers in CLASS cannot represent the change from
41795 modes FROM to TO. */
41797 bool
41798 ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41799 enum reg_class regclass)
41801 if (from == to)
41802 return false;
41804 /* x87 registers can't do subreg at all, as all values are reformatted
41805 to extended precision. */
41806 if (MAYBE_FLOAT_CLASS_P (regclass))
41807 return true;
41809 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41811 /* Vector registers do not support QI or HImode loads. If we don't
41812 disallow a change to these modes, reload will assume it's ok to
41813 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
41814 the vec_dupv4hi pattern. */
41815 if (GET_MODE_SIZE (from) < 4)
41816 return true;
41819 return false;
41822 /* Return the cost of moving data of mode M between a
41823 register and memory. A value of 2 is the default; this cost is
41824 relative to those in `REGISTER_MOVE_COST'.
41826 This function is used extensively by register_move_cost that is used to
41827 build tables at startup. Make it inline in this case.
41828 When IN is 2, return maximum of in and out move cost.
41830 If moving between registers and memory is more expensive than
41831 between two registers, you should define this macro to express the
41832 relative cost.
41834 Model also increased moving costs of QImode registers in non
41835 Q_REGS classes.
41837 static inline int
41838 inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41839 int in)
41841 int cost;
41842 if (FLOAT_CLASS_P (regclass))
41844 int index;
41845 switch (mode)
41847 case SFmode:
41848 index = 0;
41849 break;
41850 case DFmode:
41851 index = 1;
41852 break;
41853 case XFmode:
41854 index = 2;
41855 break;
41856 default:
41857 return 100;
41859 if (in == 2)
41860 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41861 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41863 if (SSE_CLASS_P (regclass))
41865 int index;
41866 switch (GET_MODE_SIZE (mode))
41868 case 4:
41869 index = 0;
41870 break;
41871 case 8:
41872 index = 1;
41873 break;
41874 case 16:
41875 index = 2;
41876 break;
41877 default:
41878 return 100;
41880 if (in == 2)
41881 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41882 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41884 if (MMX_CLASS_P (regclass))
41886 int index;
41887 switch (GET_MODE_SIZE (mode))
41889 case 4:
41890 index = 0;
41891 break;
41892 case 8:
41893 index = 1;
41894 break;
41895 default:
41896 return 100;
41898 if (in)
41899 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41900 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41902 switch (GET_MODE_SIZE (mode))
41904 case 1:
41905 if (Q_CLASS_P (regclass) || TARGET_64BIT)
41907 if (!in)
41908 return ix86_cost->int_store[0];
41909 if (TARGET_PARTIAL_REG_DEPENDENCY
41910 && optimize_function_for_speed_p (cfun))
41911 cost = ix86_cost->movzbl_load;
41912 else
41913 cost = ix86_cost->int_load[0];
41914 if (in == 2)
41915 return MAX (cost, ix86_cost->int_store[0]);
41916 return cost;
41918 else
41920 if (in == 2)
41921 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41922 if (in)
41923 return ix86_cost->movzbl_load;
41924 else
41925 return ix86_cost->int_store[0] + 4;
41927 break;
41928 case 2:
41929 if (in == 2)
41930 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41931 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41932 default:
41933 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
41934 if (mode == TFmode)
41935 mode = XFmode;
41936 if (in == 2)
41937 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41938 else if (in)
41939 cost = ix86_cost->int_load[2];
41940 else
41941 cost = ix86_cost->int_store[2];
41942 return (cost * (((int) GET_MODE_SIZE (mode)
41943 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41947 static int
41948 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41949 bool in)
41951 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41955 /* Return the cost of moving data from a register in class CLASS1 to
41956 one in class CLASS2.
41958 It is not required that the cost always equal 2 when FROM is the same as TO;
41959 on some machines it is expensive to move between registers if they are not
41960 general registers. */
41962 static int
41963 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
41964 reg_class_t class2_i)
41966 enum reg_class class1 = (enum reg_class) class1_i;
41967 enum reg_class class2 = (enum reg_class) class2_i;
41969 /* In case we require secondary memory, compute cost of the store followed
41970 by load. In order to avoid bad register allocation choices, we need
41971 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
41973 if (inline_secondary_memory_needed (class1, class2, mode, 0))
41975 int cost = 1;
41977 cost += inline_memory_move_cost (mode, class1, 2);
41978 cost += inline_memory_move_cost (mode, class2, 2);
41980 /* In case of copying from general_purpose_register we may emit multiple
41981 stores followed by single load causing memory size mismatch stall.
41982 Count this as arbitrarily high cost of 20. */
41983 if (targetm.class_max_nregs (class1, mode)
41984 > targetm.class_max_nregs (class2, mode))
41985 cost += 20;
41987 /* In the case of FP/MMX moves, the registers actually overlap, and we
41988 have to switch modes in order to treat them differently. */
41989 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
41990 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
41991 cost += 20;
41993 return cost;
41996 /* Moves between SSE/MMX and integer unit are expensive. */
41997 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
41998 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
42000 /* ??? By keeping returned value relatively high, we limit the number
42001 of moves between integer and MMX/SSE registers for all targets.
42002 Additionally, high value prevents problem with x86_modes_tieable_p(),
42003 where integer modes in MMX/SSE registers are not tieable
42004 because of missing QImode and HImode moves to, from or between
42005 MMX/SSE registers. */
42006 return MAX (8, ix86_cost->mmxsse_to_integer);
42008 if (MAYBE_FLOAT_CLASS_P (class1))
42009 return ix86_cost->fp_move;
42010 if (MAYBE_SSE_CLASS_P (class1))
42011 return ix86_cost->sse_move;
42012 if (MAYBE_MMX_CLASS_P (class1))
42013 return ix86_cost->mmx_move;
42014 return 2;
42017 /* Return TRUE if hard register REGNO can hold a value of machine-mode
42018 MODE. */
42020 bool
42021 ix86_hard_regno_mode_ok (int regno, machine_mode mode)
42023 /* Flags and only flags can only hold CCmode values. */
42024 if (CC_REGNO_P (regno))
42025 return GET_MODE_CLASS (mode) == MODE_CC;
42026 if (GET_MODE_CLASS (mode) == MODE_CC
42027 || GET_MODE_CLASS (mode) == MODE_RANDOM
42028 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
42029 return false;
42030 if (STACK_REGNO_P (regno))
42031 return VALID_FP_MODE_P (mode);
42032 if (MASK_REGNO_P (regno))
42033 return (VALID_MASK_REG_MODE (mode)
42034 || (TARGET_AVX512BW
42035 && VALID_MASK_AVX512BW_MODE (mode)));
42036 if (BND_REGNO_P (regno))
42037 return VALID_BND_REG_MODE (mode);
42038 if (SSE_REGNO_P (regno))
42040 /* We implement the move patterns for all vector modes into and
42041 out of SSE registers, even when no operation instructions
42042 are available. */
42044 /* For AVX-512 we allow, regardless of regno:
42045 - XI mode
42046 - any of 512-bit wide vector mode
42047 - any scalar mode. */
42048 if (TARGET_AVX512F
42049 && (mode == XImode
42050 || VALID_AVX512F_REG_MODE (mode)
42051 || VALID_AVX512F_SCALAR_MODE (mode)))
42052 return true;
42054 /* TODO check for QI/HI scalars. */
42055 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
42056 if (TARGET_AVX512VL
42057 && (mode == OImode
42058 || mode == TImode
42059 || VALID_AVX256_REG_MODE (mode)
42060 || VALID_AVX512VL_128_REG_MODE (mode)))
42061 return true;
42063 /* xmm16-xmm31 are only available for AVX-512. */
42064 if (EXT_REX_SSE_REGNO_P (regno))
42065 return false;
42067 /* OImode and AVX modes are available only when AVX is enabled. */
42068 return ((TARGET_AVX
42069 && VALID_AVX256_REG_OR_OI_MODE (mode))
42070 || VALID_SSE_REG_MODE (mode)
42071 || VALID_SSE2_REG_MODE (mode)
42072 || VALID_MMX_REG_MODE (mode)
42073 || VALID_MMX_REG_MODE_3DNOW (mode));
42075 if (MMX_REGNO_P (regno))
42077 /* We implement the move patterns for 3DNOW modes even in MMX mode,
42078 so if the register is available at all, then we can move data of
42079 the given mode into or out of it. */
42080 return (VALID_MMX_REG_MODE (mode)
42081 || VALID_MMX_REG_MODE_3DNOW (mode));
42084 if (mode == QImode)
42086 /* Take care for QImode values - they can be in non-QI regs,
42087 but then they do cause partial register stalls. */
42088 if (ANY_QI_REGNO_P (regno))
42089 return true;
42090 if (!TARGET_PARTIAL_REG_STALL)
42091 return true;
42092 /* LRA checks if the hard register is OK for the given mode.
42093 QImode values can live in non-QI regs, so we allow all
42094 registers here. */
42095 if (lra_in_progress)
42096 return true;
42097 return !can_create_pseudo_p ();
42099 /* We handle both integer and floats in the general purpose registers. */
42100 else if (VALID_INT_MODE_P (mode))
42101 return true;
42102 else if (VALID_FP_MODE_P (mode))
42103 return true;
42104 else if (VALID_DFP_MODE_P (mode))
42105 return true;
42106 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
42107 on to use that value in smaller contexts, this can easily force a
42108 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
42109 supporting DImode, allow it. */
42110 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42111 return true;
42113 return false;
42116 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
42117 tieable integer mode. */
42119 static bool
42120 ix86_tieable_integer_mode_p (machine_mode mode)
42122 switch (mode)
42124 case HImode:
42125 case SImode:
42126 return true;
42128 case QImode:
42129 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42131 case DImode:
42132 return TARGET_64BIT;
42134 default:
42135 return false;
42139 /* Return true if MODE1 is accessible in a register that can hold MODE2
42140 without copying. That is, all register classes that can hold MODE2
42141 can also hold MODE1. */
42143 bool
42144 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42146 if (mode1 == mode2)
42147 return true;
42149 if (ix86_tieable_integer_mode_p (mode1)
42150 && ix86_tieable_integer_mode_p (mode2))
42151 return true;
42153 /* MODE2 being XFmode implies fp stack or general regs, which means we
42154 can tie any smaller floating point modes to it. Note that we do not
42155 tie this with TFmode. */
42156 if (mode2 == XFmode)
42157 return mode1 == SFmode || mode1 == DFmode;
42159 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42160 that we can tie it with SFmode. */
42161 if (mode2 == DFmode)
42162 return mode1 == SFmode;
42164 /* If MODE2 is only appropriate for an SSE register, then tie with
42165 any other mode acceptable to SSE registers. */
42166 if (GET_MODE_SIZE (mode2) == 32
42167 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42168 return (GET_MODE_SIZE (mode1) == 32
42169 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42170 if (GET_MODE_SIZE (mode2) == 16
42171 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42172 return (GET_MODE_SIZE (mode1) == 16
42173 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42175 /* If MODE2 is appropriate for an MMX register, then tie
42176 with any other mode acceptable to MMX registers. */
42177 if (GET_MODE_SIZE (mode2) == 8
42178 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42179 return (GET_MODE_SIZE (mode1) == 8
42180 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42182 return false;
42185 /* Return the cost of moving between two registers of mode MODE. */
42187 static int
42188 ix86_set_reg_reg_cost (machine_mode mode)
42190 unsigned int units = UNITS_PER_WORD;
42192 switch (GET_MODE_CLASS (mode))
42194 default:
42195 break;
42197 case MODE_CC:
42198 units = GET_MODE_SIZE (CCmode);
42199 break;
42201 case MODE_FLOAT:
42202 if ((TARGET_SSE && mode == TFmode)
42203 || (TARGET_80387 && mode == XFmode)
42204 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42205 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42206 units = GET_MODE_SIZE (mode);
42207 break;
42209 case MODE_COMPLEX_FLOAT:
42210 if ((TARGET_SSE && mode == TCmode)
42211 || (TARGET_80387 && mode == XCmode)
42212 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42213 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42214 units = GET_MODE_SIZE (mode);
42215 break;
42217 case MODE_VECTOR_INT:
42218 case MODE_VECTOR_FLOAT:
42219 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42220 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42221 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42222 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42223 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42224 units = GET_MODE_SIZE (mode);
42227 /* Return the cost of moving between two registers of mode MODE,
42228 assuming that the move will be in pieces of at most UNITS bytes. */
42229 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42232 /* Compute a (partial) cost for rtx X. Return true if the complete
42233 cost has been computed, and false if subexpressions should be
42234 scanned. In either case, *TOTAL contains the cost result. */
42236 static bool
42237 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
42238 int *total, bool speed)
42240 rtx mask;
42241 enum rtx_code code = GET_CODE (x);
42242 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42243 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42245 switch (code)
42247 case SET:
42248 if (register_operand (SET_DEST (x), VOIDmode)
42249 && reg_or_0_operand (SET_SRC (x), VOIDmode))
42251 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42252 return true;
42254 return false;
42256 case CONST_INT:
42257 case CONST:
42258 case LABEL_REF:
42259 case SYMBOL_REF:
42260 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42261 *total = 3;
42262 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42263 *total = 2;
42264 else if (flag_pic && SYMBOLIC_CONST (x)
42265 && !(TARGET_64BIT
42266 && (GET_CODE (x) == LABEL_REF
42267 || (GET_CODE (x) == SYMBOL_REF
42268 && SYMBOL_REF_LOCAL_P (x))))
42269 /* Use 0 cost for CONST to improve its propagation. */
42270 && (TARGET_64BIT || GET_CODE (x) != CONST))
42271 *total = 1;
42272 else
42273 *total = 0;
42274 return true;
42276 case CONST_WIDE_INT:
42277 *total = 0;
42278 return true;
42280 case CONST_DOUBLE:
42281 switch (standard_80387_constant_p (x))
42283 case 1: /* 0.0 */
42284 *total = 1;
42285 return true;
42286 default: /* Other constants */
42287 *total = 2;
42288 return true;
42289 case 0:
42290 case -1:
42291 break;
42293 if (SSE_FLOAT_MODE_P (mode))
42295 case CONST_VECTOR:
42296 switch (standard_sse_constant_p (x))
42298 case 0:
42299 break;
42300 case 1: /* 0: xor eliminates false dependency */
42301 *total = 0;
42302 return true;
42303 default: /* -1: cmp contains false dependency */
42304 *total = 1;
42305 return true;
42308 /* Fall back to (MEM (SYMBOL_REF)), since that's where
42309 it'll probably end up. Add a penalty for size. */
42310 *total = (COSTS_N_INSNS (1)
42311 + (flag_pic != 0 && !TARGET_64BIT)
42312 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42313 return true;
42315 case ZERO_EXTEND:
42316 /* The zero extensions is often completely free on x86_64, so make
42317 it as cheap as possible. */
42318 if (TARGET_64BIT && mode == DImode
42319 && GET_MODE (XEXP (x, 0)) == SImode)
42320 *total = 1;
42321 else if (TARGET_ZERO_EXTEND_WITH_AND)
42322 *total = cost->add;
42323 else
42324 *total = cost->movzx;
42325 return false;
42327 case SIGN_EXTEND:
42328 *total = cost->movsx;
42329 return false;
42331 case ASHIFT:
42332 if (SCALAR_INT_MODE_P (mode)
42333 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42334 && CONST_INT_P (XEXP (x, 1)))
42336 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42337 if (value == 1)
42339 *total = cost->add;
42340 return false;
42342 if ((value == 2 || value == 3)
42343 && cost->lea <= cost->shift_const)
42345 *total = cost->lea;
42346 return false;
42349 /* FALLTHRU */
42351 case ROTATE:
42352 case ASHIFTRT:
42353 case LSHIFTRT:
42354 case ROTATERT:
42355 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42357 /* ??? Should be SSE vector operation cost. */
42358 /* At least for published AMD latencies, this really is the same
42359 as the latency for a simple fpu operation like fabs. */
42360 /* V*QImode is emulated with 1-11 insns. */
42361 if (mode == V16QImode || mode == V32QImode)
42363 int count = 11;
42364 if (TARGET_XOP && mode == V16QImode)
42366 /* For XOP we use vpshab, which requires a broadcast of the
42367 value to the variable shift insn. For constants this
42368 means a V16Q const in mem; even when we can perform the
42369 shift with one insn set the cost to prefer paddb. */
42370 if (CONSTANT_P (XEXP (x, 1)))
42372 *total = (cost->fabs
42373 + rtx_cost (XEXP (x, 0), mode, code, 0, speed)
42374 + (speed ? 2 : COSTS_N_BYTES (16)));
42375 return true;
42377 count = 3;
42379 else if (TARGET_SSSE3)
42380 count = 7;
42381 *total = cost->fabs * count;
42383 else
42384 *total = cost->fabs;
42386 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42388 if (CONST_INT_P (XEXP (x, 1)))
42390 if (INTVAL (XEXP (x, 1)) > 32)
42391 *total = cost->shift_const + COSTS_N_INSNS (2);
42392 else
42393 *total = cost->shift_const * 2;
42395 else
42397 if (GET_CODE (XEXP (x, 1)) == AND)
42398 *total = cost->shift_var * 2;
42399 else
42400 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42403 else
42405 if (CONST_INT_P (XEXP (x, 1)))
42406 *total = cost->shift_const;
42407 else if (GET_CODE (XEXP (x, 1)) == SUBREG
42408 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42410 /* Return the cost after shift-and truncation. */
42411 *total = cost->shift_var;
42412 return true;
42414 else
42415 *total = cost->shift_var;
42417 return false;
42419 case FMA:
42421 rtx sub;
42423 gcc_assert (FLOAT_MODE_P (mode));
42424 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42426 /* ??? SSE scalar/vector cost should be used here. */
42427 /* ??? Bald assumption that fma has the same cost as fmul. */
42428 *total = cost->fmul;
42429 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
42431 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
42432 sub = XEXP (x, 0);
42433 if (GET_CODE (sub) == NEG)
42434 sub = XEXP (sub, 0);
42435 *total += rtx_cost (sub, mode, FMA, 0, speed);
42437 sub = XEXP (x, 2);
42438 if (GET_CODE (sub) == NEG)
42439 sub = XEXP (sub, 0);
42440 *total += rtx_cost (sub, mode, FMA, 2, speed);
42441 return true;
42444 case MULT:
42445 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42447 /* ??? SSE scalar cost should be used here. */
42448 *total = cost->fmul;
42449 return false;
42451 else if (X87_FLOAT_MODE_P (mode))
42453 *total = cost->fmul;
42454 return false;
42456 else if (FLOAT_MODE_P (mode))
42458 /* ??? SSE vector cost should be used here. */
42459 *total = cost->fmul;
42460 return false;
42462 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42464 /* V*QImode is emulated with 7-13 insns. */
42465 if (mode == V16QImode || mode == V32QImode)
42467 int extra = 11;
42468 if (TARGET_XOP && mode == V16QImode)
42469 extra = 5;
42470 else if (TARGET_SSSE3)
42471 extra = 6;
42472 *total = cost->fmul * 2 + cost->fabs * extra;
42474 /* V*DImode is emulated with 5-8 insns. */
42475 else if (mode == V2DImode || mode == V4DImode)
42477 if (TARGET_XOP && mode == V2DImode)
42478 *total = cost->fmul * 2 + cost->fabs * 3;
42479 else
42480 *total = cost->fmul * 3 + cost->fabs * 5;
42482 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42483 insns, including two PMULUDQ. */
42484 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42485 *total = cost->fmul * 2 + cost->fabs * 5;
42486 else
42487 *total = cost->fmul;
42488 return false;
42490 else
42492 rtx op0 = XEXP (x, 0);
42493 rtx op1 = XEXP (x, 1);
42494 int nbits;
42495 if (CONST_INT_P (XEXP (x, 1)))
42497 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42498 for (nbits = 0; value != 0; value &= value - 1)
42499 nbits++;
42501 else
42502 /* This is arbitrary. */
42503 nbits = 7;
42505 /* Compute costs correctly for widening multiplication. */
42506 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42507 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42508 == GET_MODE_SIZE (mode))
42510 int is_mulwiden = 0;
42511 machine_mode inner_mode = GET_MODE (op0);
42513 if (GET_CODE (op0) == GET_CODE (op1))
42514 is_mulwiden = 1, op1 = XEXP (op1, 0);
42515 else if (CONST_INT_P (op1))
42517 if (GET_CODE (op0) == SIGN_EXTEND)
42518 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42519 == INTVAL (op1);
42520 else
42521 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42524 if (is_mulwiden)
42525 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42528 *total = (cost->mult_init[MODE_INDEX (mode)]
42529 + nbits * cost->mult_bit
42530 + rtx_cost (op0, mode, outer_code, opno, speed)
42531 + rtx_cost (op1, mode, outer_code, opno, speed));
42533 return true;
42536 case DIV:
42537 case UDIV:
42538 case MOD:
42539 case UMOD:
42540 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42541 /* ??? SSE cost should be used here. */
42542 *total = cost->fdiv;
42543 else if (X87_FLOAT_MODE_P (mode))
42544 *total = cost->fdiv;
42545 else if (FLOAT_MODE_P (mode))
42546 /* ??? SSE vector cost should be used here. */
42547 *total = cost->fdiv;
42548 else
42549 *total = cost->divide[MODE_INDEX (mode)];
42550 return false;
42552 case PLUS:
42553 if (GET_MODE_CLASS (mode) == MODE_INT
42554 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42556 if (GET_CODE (XEXP (x, 0)) == PLUS
42557 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42558 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42559 && CONSTANT_P (XEXP (x, 1)))
42561 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42562 if (val == 2 || val == 4 || val == 8)
42564 *total = cost->lea;
42565 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
42566 outer_code, opno, speed);
42567 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
42568 outer_code, opno, speed);
42569 *total += rtx_cost (XEXP (x, 1), mode,
42570 outer_code, opno, speed);
42571 return true;
42574 else if (GET_CODE (XEXP (x, 0)) == MULT
42575 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42577 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42578 if (val == 2 || val == 4 || val == 8)
42580 *total = cost->lea;
42581 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
42582 outer_code, opno, speed);
42583 *total += rtx_cost (XEXP (x, 1), mode,
42584 outer_code, opno, speed);
42585 return true;
42588 else if (GET_CODE (XEXP (x, 0)) == PLUS)
42590 *total = cost->lea;
42591 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
42592 outer_code, opno, speed);
42593 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
42594 outer_code, opno, speed);
42595 *total += rtx_cost (XEXP (x, 1), mode,
42596 outer_code, opno, speed);
42597 return true;
42600 /* FALLTHRU */
42602 case MINUS:
42603 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42605 /* ??? SSE cost should be used here. */
42606 *total = cost->fadd;
42607 return false;
42609 else if (X87_FLOAT_MODE_P (mode))
42611 *total = cost->fadd;
42612 return false;
42614 else if (FLOAT_MODE_P (mode))
42616 /* ??? SSE vector cost should be used here. */
42617 *total = cost->fadd;
42618 return false;
42620 /* FALLTHRU */
42622 case AND:
42623 case IOR:
42624 case XOR:
42625 if (GET_MODE_CLASS (mode) == MODE_INT
42626 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42628 *total = (cost->add * 2
42629 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
42630 << (GET_MODE (XEXP (x, 0)) != DImode))
42631 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
42632 << (GET_MODE (XEXP (x, 1)) != DImode)));
42633 return true;
42635 /* FALLTHRU */
42637 case NEG:
42638 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42640 /* ??? SSE cost should be used here. */
42641 *total = cost->fchs;
42642 return false;
42644 else if (X87_FLOAT_MODE_P (mode))
42646 *total = cost->fchs;
42647 return false;
42649 else if (FLOAT_MODE_P (mode))
42651 /* ??? SSE vector cost should be used here. */
42652 *total = cost->fchs;
42653 return false;
42655 /* FALLTHRU */
42657 case NOT:
42658 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42660 /* ??? Should be SSE vector operation cost. */
42661 /* At least for published AMD latencies, this really is the same
42662 as the latency for a simple fpu operation like fabs. */
42663 *total = cost->fabs;
42665 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42666 *total = cost->add * 2;
42667 else
42668 *total = cost->add;
42669 return false;
42671 case COMPARE:
42672 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42673 && XEXP (XEXP (x, 0), 1) == const1_rtx
42674 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42675 && XEXP (x, 1) == const0_rtx)
42677 /* This kind of construct is implemented using test[bwl].
42678 Treat it as if we had an AND. */
42679 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
42680 *total = (cost->add
42681 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
42682 opno, speed)
42683 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
42684 return true;
42687 /* The embedded comparison operand is completely free. */
42688 if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
42689 && XEXP (x, 1) == const0_rtx)
42690 *total = 0;
42692 return false;
42694 case FLOAT_EXTEND:
42695 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42696 *total = 0;
42697 return false;
42699 case ABS:
42700 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42701 /* ??? SSE cost should be used here. */
42702 *total = cost->fabs;
42703 else if (X87_FLOAT_MODE_P (mode))
42704 *total = cost->fabs;
42705 else if (FLOAT_MODE_P (mode))
42706 /* ??? SSE vector cost should be used here. */
42707 *total = cost->fabs;
42708 return false;
42710 case SQRT:
42711 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42712 /* ??? SSE cost should be used here. */
42713 *total = cost->fsqrt;
42714 else if (X87_FLOAT_MODE_P (mode))
42715 *total = cost->fsqrt;
42716 else if (FLOAT_MODE_P (mode))
42717 /* ??? SSE vector cost should be used here. */
42718 *total = cost->fsqrt;
42719 return false;
42721 case UNSPEC:
42722 if (XINT (x, 1) == UNSPEC_TP)
42723 *total = 0;
42724 return false;
42726 case VEC_SELECT:
42727 case VEC_CONCAT:
42728 case VEC_DUPLICATE:
42729 /* ??? Assume all of these vector manipulation patterns are
42730 recognizable. In which case they all pretty much have the
42731 same cost. */
42732 *total = cost->fabs;
42733 return true;
42734 case VEC_MERGE:
42735 mask = XEXP (x, 2);
42736 /* This is masked instruction, assume the same cost,
42737 as nonmasked variant. */
42738 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42739 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
42740 else
42741 *total = cost->fabs;
42742 return true;
42744 default:
42745 return false;
42749 #if TARGET_MACHO
42751 static int current_machopic_label_num;
42753 /* Given a symbol name and its associated stub, write out the
42754 definition of the stub. */
42756 void
42757 machopic_output_stub (FILE *file, const char *symb, const char *stub)
42759 unsigned int length;
42760 char *binder_name, *symbol_name, lazy_ptr_name[32];
42761 int label = ++current_machopic_label_num;
42763 /* For 64-bit we shouldn't get here. */
42764 gcc_assert (!TARGET_64BIT);
42766 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
42767 symb = targetm.strip_name_encoding (symb);
42769 length = strlen (stub);
42770 binder_name = XALLOCAVEC (char, length + 32);
42771 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42773 length = strlen (symb);
42774 symbol_name = XALLOCAVEC (char, length + 32);
42775 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42777 sprintf (lazy_ptr_name, "L%d$lz", label);
42779 if (MACHOPIC_ATT_STUB)
42780 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42781 else if (MACHOPIC_PURE)
42782 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42783 else
42784 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42786 fprintf (file, "%s:\n", stub);
42787 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42789 if (MACHOPIC_ATT_STUB)
42791 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42793 else if (MACHOPIC_PURE)
42795 /* PIC stub. */
42796 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42797 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42798 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
42799 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42800 label, lazy_ptr_name, label);
42801 fprintf (file, "\tjmp\t*%%ecx\n");
42803 else
42804 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42806 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42807 it needs no stub-binding-helper. */
42808 if (MACHOPIC_ATT_STUB)
42809 return;
42811 fprintf (file, "%s:\n", binder_name);
42813 if (MACHOPIC_PURE)
42815 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42816 fprintf (file, "\tpushl\t%%ecx\n");
42818 else
42819 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42821 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42823 /* N.B. Keep the correspondence of these
42824 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42825 old-pic/new-pic/non-pic stubs; altering this will break
42826 compatibility with existing dylibs. */
42827 if (MACHOPIC_PURE)
42829 /* 25-byte PIC stub using "CALL get_pc_thunk". */
42830 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42832 else
42833 /* 16-byte -mdynamic-no-pic stub. */
42834 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42836 fprintf (file, "%s:\n", lazy_ptr_name);
42837 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42838 fprintf (file, ASM_LONG "%s\n", binder_name);
42840 #endif /* TARGET_MACHO */
42842 /* Order the registers for register allocator. */
42844 void
42845 x86_order_regs_for_local_alloc (void)
42847 int pos = 0;
42848 int i;
42850 /* First allocate the local general purpose registers. */
42851 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42852 if (GENERAL_REGNO_P (i) && call_used_regs[i])
42853 reg_alloc_order [pos++] = i;
42855 /* Global general purpose registers. */
42856 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42857 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42858 reg_alloc_order [pos++] = i;
42860 /* x87 registers come first in case we are doing FP math
42861 using them. */
42862 if (!TARGET_SSE_MATH)
42863 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42864 reg_alloc_order [pos++] = i;
42866 /* SSE registers. */
42867 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42868 reg_alloc_order [pos++] = i;
42869 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42870 reg_alloc_order [pos++] = i;
42872 /* Extended REX SSE registers. */
42873 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42874 reg_alloc_order [pos++] = i;
42876 /* Mask register. */
42877 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42878 reg_alloc_order [pos++] = i;
42880 /* MPX bound registers. */
42881 for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42882 reg_alloc_order [pos++] = i;
42884 /* x87 registers. */
42885 if (TARGET_SSE_MATH)
42886 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42887 reg_alloc_order [pos++] = i;
42889 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42890 reg_alloc_order [pos++] = i;
42892 /* Initialize the rest of array as we do not allocate some registers
42893 at all. */
42894 while (pos < FIRST_PSEUDO_REGISTER)
42895 reg_alloc_order [pos++] = 0;
42898 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
42899 in struct attribute_spec handler. */
42900 static tree
42901 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42902 tree args,
42903 int,
42904 bool *no_add_attrs)
42906 if (TREE_CODE (*node) != FUNCTION_TYPE
42907 && TREE_CODE (*node) != METHOD_TYPE
42908 && TREE_CODE (*node) != FIELD_DECL
42909 && TREE_CODE (*node) != TYPE_DECL)
42911 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42912 name);
42913 *no_add_attrs = true;
42914 return NULL_TREE;
42916 if (TARGET_64BIT)
42918 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42919 name);
42920 *no_add_attrs = true;
42921 return NULL_TREE;
42923 if (is_attribute_p ("callee_pop_aggregate_return", name))
42925 tree cst;
42927 cst = TREE_VALUE (args);
42928 if (TREE_CODE (cst) != INTEGER_CST)
42930 warning (OPT_Wattributes,
42931 "%qE attribute requires an integer constant argument",
42932 name);
42933 *no_add_attrs = true;
42935 else if (compare_tree_int (cst, 0) != 0
42936 && compare_tree_int (cst, 1) != 0)
42938 warning (OPT_Wattributes,
42939 "argument to %qE attribute is neither zero, nor one",
42940 name);
42941 *no_add_attrs = true;
42944 return NULL_TREE;
42947 return NULL_TREE;
42950 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
42951 struct attribute_spec.handler. */
42952 static tree
42953 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42954 bool *no_add_attrs)
42956 if (TREE_CODE (*node) != FUNCTION_TYPE
42957 && TREE_CODE (*node) != METHOD_TYPE
42958 && TREE_CODE (*node) != FIELD_DECL
42959 && TREE_CODE (*node) != TYPE_DECL)
42961 warning (OPT_Wattributes, "%qE attribute only applies to functions",
42962 name);
42963 *no_add_attrs = true;
42964 return NULL_TREE;
42967 /* Can combine regparm with all attributes but fastcall. */
42968 if (is_attribute_p ("ms_abi", name))
42970 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
42972 error ("ms_abi and sysv_abi attributes are not compatible");
42975 return NULL_TREE;
42977 else if (is_attribute_p ("sysv_abi", name))
42979 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
42981 error ("ms_abi and sysv_abi attributes are not compatible");
42984 return NULL_TREE;
42987 return NULL_TREE;
42990 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
42991 struct attribute_spec.handler. */
42992 static tree
42993 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
42994 bool *no_add_attrs)
42996 tree *type = NULL;
42997 if (DECL_P (*node))
42999 if (TREE_CODE (*node) == TYPE_DECL)
43000 type = &TREE_TYPE (*node);
43002 else
43003 type = node;
43005 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
43007 warning (OPT_Wattributes, "%qE attribute ignored",
43008 name);
43009 *no_add_attrs = true;
43012 else if ((is_attribute_p ("ms_struct", name)
43013 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
43014 || ((is_attribute_p ("gcc_struct", name)
43015 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
43017 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
43018 name);
43019 *no_add_attrs = true;
43022 return NULL_TREE;
43025 static tree
43026 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
43027 bool *no_add_attrs)
43029 if (TREE_CODE (*node) != FUNCTION_DECL)
43031 warning (OPT_Wattributes, "%qE attribute only applies to functions",
43032 name);
43033 *no_add_attrs = true;
43035 return NULL_TREE;
43038 static bool
43039 ix86_ms_bitfield_layout_p (const_tree record_type)
43041 return ((TARGET_MS_BITFIELD_LAYOUT
43042 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
43043 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
43046 /* Returns an expression indicating where the this parameter is
43047 located on entry to the FUNCTION. */
43049 static rtx
43050 x86_this_parameter (tree function)
43052 tree type = TREE_TYPE (function);
43053 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
43054 int nregs;
43056 if (TARGET_64BIT)
43058 const int *parm_regs;
43060 if (ix86_function_type_abi (type) == MS_ABI)
43061 parm_regs = x86_64_ms_abi_int_parameter_registers;
43062 else
43063 parm_regs = x86_64_int_parameter_registers;
43064 return gen_rtx_REG (Pmode, parm_regs[aggr]);
43067 nregs = ix86_function_regparm (type, function);
43069 if (nregs > 0 && !stdarg_p (type))
43071 int regno;
43072 unsigned int ccvt = ix86_get_callcvt (type);
43074 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43075 regno = aggr ? DX_REG : CX_REG;
43076 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43078 regno = CX_REG;
43079 if (aggr)
43080 return gen_rtx_MEM (SImode,
43081 plus_constant (Pmode, stack_pointer_rtx, 4));
43083 else
43085 regno = AX_REG;
43086 if (aggr)
43088 regno = DX_REG;
43089 if (nregs == 1)
43090 return gen_rtx_MEM (SImode,
43091 plus_constant (Pmode,
43092 stack_pointer_rtx, 4));
43095 return gen_rtx_REG (SImode, regno);
43098 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43099 aggr ? 8 : 4));
43102 /* Determine whether x86_output_mi_thunk can succeed. */
43104 static bool
43105 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43106 const_tree function)
43108 /* 64-bit can handle anything. */
43109 if (TARGET_64BIT)
43110 return true;
43112 /* For 32-bit, everything's fine if we have one free register. */
43113 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43114 return true;
43116 /* Need a free register for vcall_offset. */
43117 if (vcall_offset)
43118 return false;
43120 /* Need a free register for GOT references. */
43121 if (flag_pic && !targetm.binds_local_p (function))
43122 return false;
43124 /* Otherwise ok. */
43125 return true;
43128 /* Output the assembler code for a thunk function. THUNK_DECL is the
43129 declaration for the thunk function itself, FUNCTION is the decl for
43130 the target function. DELTA is an immediate constant offset to be
43131 added to THIS. If VCALL_OFFSET is nonzero, the word at
43132 *(*this + vcall_offset) should be added to THIS. */
43134 static void
43135 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43136 HOST_WIDE_INT vcall_offset, tree function)
43138 rtx this_param = x86_this_parameter (function);
43139 rtx this_reg, tmp, fnaddr;
43140 unsigned int tmp_regno;
43141 rtx_insn *insn;
43143 if (TARGET_64BIT)
43144 tmp_regno = R10_REG;
43145 else
43147 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43148 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43149 tmp_regno = AX_REG;
43150 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43151 tmp_regno = DX_REG;
43152 else
43153 tmp_regno = CX_REG;
43156 emit_note (NOTE_INSN_PROLOGUE_END);
43158 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
43159 pull it in now and let DELTA benefit. */
43160 if (REG_P (this_param))
43161 this_reg = this_param;
43162 else if (vcall_offset)
43164 /* Put the this parameter into %eax. */
43165 this_reg = gen_rtx_REG (Pmode, AX_REG);
43166 emit_move_insn (this_reg, this_param);
43168 else
43169 this_reg = NULL_RTX;
43171 /* Adjust the this parameter by a fixed constant. */
43172 if (delta)
43174 rtx delta_rtx = GEN_INT (delta);
43175 rtx delta_dst = this_reg ? this_reg : this_param;
43177 if (TARGET_64BIT)
43179 if (!x86_64_general_operand (delta_rtx, Pmode))
43181 tmp = gen_rtx_REG (Pmode, tmp_regno);
43182 emit_move_insn (tmp, delta_rtx);
43183 delta_rtx = tmp;
43187 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43190 /* Adjust the this parameter by a value stored in the vtable. */
43191 if (vcall_offset)
43193 rtx vcall_addr, vcall_mem, this_mem;
43195 tmp = gen_rtx_REG (Pmode, tmp_regno);
43197 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43198 if (Pmode != ptr_mode)
43199 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43200 emit_move_insn (tmp, this_mem);
43202 /* Adjust the this parameter. */
43203 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43204 if (TARGET_64BIT
43205 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43207 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43208 emit_move_insn (tmp2, GEN_INT (vcall_offset));
43209 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43212 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43213 if (Pmode != ptr_mode)
43214 emit_insn (gen_addsi_1_zext (this_reg,
43215 gen_rtx_REG (ptr_mode,
43216 REGNO (this_reg)),
43217 vcall_mem));
43218 else
43219 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43222 /* If necessary, drop THIS back to its stack slot. */
43223 if (this_reg && this_reg != this_param)
43224 emit_move_insn (this_param, this_reg);
43226 fnaddr = XEXP (DECL_RTL (function), 0);
43227 if (TARGET_64BIT)
43229 if (!flag_pic || targetm.binds_local_p (function)
43230 || TARGET_PECOFF)
43232 else
43234 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43235 tmp = gen_rtx_CONST (Pmode, tmp);
43236 fnaddr = gen_const_mem (Pmode, tmp);
43239 else
43241 if (!flag_pic || targetm.binds_local_p (function))
43243 #if TARGET_MACHO
43244 else if (TARGET_MACHO)
43246 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43247 fnaddr = XEXP (fnaddr, 0);
43249 #endif /* TARGET_MACHO */
43250 else
43252 tmp = gen_rtx_REG (Pmode, CX_REG);
43253 output_set_got (tmp, NULL_RTX);
43255 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43256 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43257 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43258 fnaddr = gen_const_mem (Pmode, fnaddr);
43262 /* Our sibling call patterns do not allow memories, because we have no
43263 predicate that can distinguish between frame and non-frame memory.
43264 For our purposes here, we can get away with (ab)using a jump pattern,
43265 because we're going to do no optimization. */
43266 if (MEM_P (fnaddr))
43268 if (sibcall_insn_operand (fnaddr, word_mode))
43270 fnaddr = XEXP (DECL_RTL (function), 0);
43271 tmp = gen_rtx_MEM (QImode, fnaddr);
43272 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43273 tmp = emit_call_insn (tmp);
43274 SIBLING_CALL_P (tmp) = 1;
43276 else
43277 emit_jump_insn (gen_indirect_jump (fnaddr));
43279 else
43281 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43283 // CM_LARGE_PIC always uses pseudo PIC register which is
43284 // uninitialized. Since FUNCTION is local and calling it
43285 // doesn't go through PLT, we use scratch register %r11 as
43286 // PIC register and initialize it here.
43287 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43288 ix86_init_large_pic_reg (tmp_regno);
43289 fnaddr = legitimize_pic_address (fnaddr,
43290 gen_rtx_REG (Pmode, tmp_regno));
43293 if (!sibcall_insn_operand (fnaddr, word_mode))
43295 tmp = gen_rtx_REG (word_mode, tmp_regno);
43296 if (GET_MODE (fnaddr) != word_mode)
43297 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43298 emit_move_insn (tmp, fnaddr);
43299 fnaddr = tmp;
43302 tmp = gen_rtx_MEM (QImode, fnaddr);
43303 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43304 tmp = emit_call_insn (tmp);
43305 SIBLING_CALL_P (tmp) = 1;
43307 emit_barrier ();
43309 /* Emit just enough of rest_of_compilation to get the insns emitted.
43310 Note that use_thunk calls assemble_start_function et al. */
43311 insn = get_insns ();
43312 shorten_branches (insn);
43313 final_start_function (insn, file, 1);
43314 final (insn, file, 1);
43315 final_end_function ();
43318 static void
43319 x86_file_start (void)
43321 default_file_start ();
43322 if (TARGET_16BIT)
43323 fputs ("\t.code16gcc\n", asm_out_file);
43324 #if TARGET_MACHO
43325 darwin_file_start ();
43326 #endif
43327 if (X86_FILE_START_VERSION_DIRECTIVE)
43328 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43329 if (X86_FILE_START_FLTUSED)
43330 fputs ("\t.global\t__fltused\n", asm_out_file);
43331 if (ix86_asm_dialect == ASM_INTEL)
43332 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43336 x86_field_alignment (tree field, int computed)
43338 machine_mode mode;
43339 tree type = TREE_TYPE (field);
43341 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43342 return computed;
43343 if (TARGET_IAMCU)
43344 return iamcu_alignment (type, computed);
43345 mode = TYPE_MODE (strip_array_types (type));
43346 if (mode == DFmode || mode == DCmode
43347 || GET_MODE_CLASS (mode) == MODE_INT
43348 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43349 return MIN (32, computed);
43350 return computed;
43353 /* Print call to TARGET to FILE. */
43355 static void
43356 x86_print_call_or_nop (FILE *file, const char *target)
43358 if (flag_nop_mcount)
43359 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
43360 else
43361 fprintf (file, "1:\tcall\t%s\n", target);
43364 /* Output assembler code to FILE to increment profiler label # LABELNO
43365 for profiling a function entry. */
43366 void
43367 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43369 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43370 : MCOUNT_NAME);
43371 if (TARGET_64BIT)
43373 #ifndef NO_PROFILE_COUNTERS
43374 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43375 #endif
43377 if (!TARGET_PECOFF && flag_pic)
43378 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43379 else
43380 x86_print_call_or_nop (file, mcount_name);
43382 else if (flag_pic)
43384 #ifndef NO_PROFILE_COUNTERS
43385 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43386 LPREFIX, labelno);
43387 #endif
43388 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43390 else
43392 #ifndef NO_PROFILE_COUNTERS
43393 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43394 LPREFIX, labelno);
43395 #endif
43396 x86_print_call_or_nop (file, mcount_name);
43399 if (flag_record_mcount)
43401 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43402 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43403 fprintf (file, "\t.previous\n");
43407 /* We don't have exact information about the insn sizes, but we may assume
43408 quite safely that we are informed about all 1 byte insns and memory
43409 address sizes. This is enough to eliminate unnecessary padding in
43410 99% of cases. */
43412 static int
43413 min_insn_size (rtx_insn *insn)
43415 int l = 0, len;
43417 if (!INSN_P (insn) || !active_insn_p (insn))
43418 return 0;
43420 /* Discard alignments we've emit and jump instructions. */
43421 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43422 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43423 return 0;
43425 /* Important case - calls are always 5 bytes.
43426 It is common to have many calls in the row. */
43427 if (CALL_P (insn)
43428 && symbolic_reference_mentioned_p (PATTERN (insn))
43429 && !SIBLING_CALL_P (insn))
43430 return 5;
43431 len = get_attr_length (insn);
43432 if (len <= 1)
43433 return 1;
43435 /* For normal instructions we rely on get_attr_length being exact,
43436 with a few exceptions. */
43437 if (!JUMP_P (insn))
43439 enum attr_type type = get_attr_type (insn);
43441 switch (type)
43443 case TYPE_MULTI:
43444 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43445 || asm_noperands (PATTERN (insn)) >= 0)
43446 return 0;
43447 break;
43448 case TYPE_OTHER:
43449 case TYPE_FCMP:
43450 break;
43451 default:
43452 /* Otherwise trust get_attr_length. */
43453 return len;
43456 l = get_attr_length_address (insn);
43457 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43458 l = 4;
43460 if (l)
43461 return 1+l;
43462 else
43463 return 2;
43466 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43468 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43469 window. */
43471 static void
43472 ix86_avoid_jump_mispredicts (void)
43474 rtx_insn *insn, *start = get_insns ();
43475 int nbytes = 0, njumps = 0;
43476 bool isjump = false;
43478 /* Look for all minimal intervals of instructions containing 4 jumps.
43479 The intervals are bounded by START and INSN. NBYTES is the total
43480 size of instructions in the interval including INSN and not including
43481 START. When the NBYTES is smaller than 16 bytes, it is possible
43482 that the end of START and INSN ends up in the same 16byte page.
43484 The smallest offset in the page INSN can start is the case where START
43485 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
43486 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43488 Don't consider asm goto as jump, while it can contain a jump, it doesn't
43489 have to, control transfer to label(s) can be performed through other
43490 means, and also we estimate minimum length of all asm stmts as 0. */
43491 for (insn = start; insn; insn = NEXT_INSN (insn))
43493 int min_size;
43495 if (LABEL_P (insn))
43497 int align = label_to_alignment (insn);
43498 int max_skip = label_to_max_skip (insn);
43500 if (max_skip > 15)
43501 max_skip = 15;
43502 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43503 already in the current 16 byte page, because otherwise
43504 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43505 bytes to reach 16 byte boundary. */
43506 if (align <= 0
43507 || (align <= 3 && max_skip != (1 << align) - 1))
43508 max_skip = 0;
43509 if (dump_file)
43510 fprintf (dump_file, "Label %i with max_skip %i\n",
43511 INSN_UID (insn), max_skip);
43512 if (max_skip)
43514 while (nbytes + max_skip >= 16)
43516 start = NEXT_INSN (start);
43517 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43518 || CALL_P (start))
43519 njumps--, isjump = true;
43520 else
43521 isjump = false;
43522 nbytes -= min_insn_size (start);
43525 continue;
43528 min_size = min_insn_size (insn);
43529 nbytes += min_size;
43530 if (dump_file)
43531 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43532 INSN_UID (insn), min_size);
43533 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43534 || CALL_P (insn))
43535 njumps++;
43536 else
43537 continue;
43539 while (njumps > 3)
43541 start = NEXT_INSN (start);
43542 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43543 || CALL_P (start))
43544 njumps--, isjump = true;
43545 else
43546 isjump = false;
43547 nbytes -= min_insn_size (start);
43549 gcc_assert (njumps >= 0);
43550 if (dump_file)
43551 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43552 INSN_UID (start), INSN_UID (insn), nbytes);
43554 if (njumps == 3 && isjump && nbytes < 16)
43556 int padsize = 15 - nbytes + min_insn_size (insn);
43558 if (dump_file)
43559 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43560 INSN_UID (insn), padsize);
43561 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43565 #endif
43567 /* AMD Athlon works faster
43568 when RET is not destination of conditional jump or directly preceded
43569 by other jump instruction. We avoid the penalty by inserting NOP just
43570 before the RET instructions in such cases. */
43571 static void
43572 ix86_pad_returns (void)
43574 edge e;
43575 edge_iterator ei;
43577 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43579 basic_block bb = e->src;
43580 rtx_insn *ret = BB_END (bb);
43581 rtx_insn *prev;
43582 bool replace = false;
43584 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43585 || optimize_bb_for_size_p (bb))
43586 continue;
43587 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43588 if (active_insn_p (prev) || LABEL_P (prev))
43589 break;
43590 if (prev && LABEL_P (prev))
43592 edge e;
43593 edge_iterator ei;
43595 FOR_EACH_EDGE (e, ei, bb->preds)
43596 if (EDGE_FREQUENCY (e) && e->src->index >= 0
43597 && !(e->flags & EDGE_FALLTHRU))
43599 replace = true;
43600 break;
43603 if (!replace)
43605 prev = prev_active_insn (ret);
43606 if (prev
43607 && ((JUMP_P (prev) && any_condjump_p (prev))
43608 || CALL_P (prev)))
43609 replace = true;
43610 /* Empty functions get branch mispredict even when
43611 the jump destination is not visible to us. */
43612 if (!prev && !optimize_function_for_size_p (cfun))
43613 replace = true;
43615 if (replace)
43617 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43618 delete_insn (ret);
43623 /* Count the minimum number of instructions in BB. Return 4 if the
43624 number of instructions >= 4. */
43626 static int
43627 ix86_count_insn_bb (basic_block bb)
43629 rtx_insn *insn;
43630 int insn_count = 0;
43632 /* Count number of instructions in this block. Return 4 if the number
43633 of instructions >= 4. */
43634 FOR_BB_INSNS (bb, insn)
43636 /* Only happen in exit blocks. */
43637 if (JUMP_P (insn)
43638 && ANY_RETURN_P (PATTERN (insn)))
43639 break;
43641 if (NONDEBUG_INSN_P (insn)
43642 && GET_CODE (PATTERN (insn)) != USE
43643 && GET_CODE (PATTERN (insn)) != CLOBBER)
43645 insn_count++;
43646 if (insn_count >= 4)
43647 return insn_count;
43651 return insn_count;
43655 /* Count the minimum number of instructions in code path in BB.
43656 Return 4 if the number of instructions >= 4. */
43658 static int
43659 ix86_count_insn (basic_block bb)
43661 edge e;
43662 edge_iterator ei;
43663 int min_prev_count;
43665 /* Only bother counting instructions along paths with no
43666 more than 2 basic blocks between entry and exit. Given
43667 that BB has an edge to exit, determine if a predecessor
43668 of BB has an edge from entry. If so, compute the number
43669 of instructions in the predecessor block. If there
43670 happen to be multiple such blocks, compute the minimum. */
43671 min_prev_count = 4;
43672 FOR_EACH_EDGE (e, ei, bb->preds)
43674 edge prev_e;
43675 edge_iterator prev_ei;
43677 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43679 min_prev_count = 0;
43680 break;
43682 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43684 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43686 int count = ix86_count_insn_bb (e->src);
43687 if (count < min_prev_count)
43688 min_prev_count = count;
43689 break;
43694 if (min_prev_count < 4)
43695 min_prev_count += ix86_count_insn_bb (bb);
43697 return min_prev_count;
43700 /* Pad short function to 4 instructions. */
43702 static void
43703 ix86_pad_short_function (void)
43705 edge e;
43706 edge_iterator ei;
43708 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43710 rtx_insn *ret = BB_END (e->src);
43711 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43713 int insn_count = ix86_count_insn (e->src);
43715 /* Pad short function. */
43716 if (insn_count < 4)
43718 rtx_insn *insn = ret;
43720 /* Find epilogue. */
43721 while (insn
43722 && (!NOTE_P (insn)
43723 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43724 insn = PREV_INSN (insn);
43726 if (!insn)
43727 insn = ret;
43729 /* Two NOPs count as one instruction. */
43730 insn_count = 2 * (4 - insn_count);
43731 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43737 /* Fix up a Windows system unwinder issue. If an EH region falls through into
43738 the epilogue, the Windows system unwinder will apply epilogue logic and
43739 produce incorrect offsets. This can be avoided by adding a nop between
43740 the last insn that can throw and the first insn of the epilogue. */
43742 static void
43743 ix86_seh_fixup_eh_fallthru (void)
43745 edge e;
43746 edge_iterator ei;
43748 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43750 rtx_insn *insn, *next;
43752 /* Find the beginning of the epilogue. */
43753 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43754 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43755 break;
43756 if (insn == NULL)
43757 continue;
43759 /* We only care about preceding insns that can throw. */
43760 insn = prev_active_insn (insn);
43761 if (insn == NULL || !can_throw_internal (insn))
43762 continue;
43764 /* Do not separate calls from their debug information. */
43765 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43766 if (NOTE_P (next)
43767 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43768 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43769 insn = next;
43770 else
43771 break;
43773 emit_insn_after (gen_nops (const1_rtx), insn);
43777 /* Implement machine specific optimizations. We implement padding of returns
43778 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
43779 static void
43780 ix86_reorg (void)
43782 /* We are freeing block_for_insn in the toplev to keep compatibility
43783 with old MDEP_REORGS that are not CFG based. Recompute it now. */
43784 compute_bb_for_insn ();
43786 if (TARGET_SEH && current_function_has_exception_handlers ())
43787 ix86_seh_fixup_eh_fallthru ();
43789 if (optimize && optimize_function_for_speed_p (cfun))
43791 if (TARGET_PAD_SHORT_FUNCTION)
43792 ix86_pad_short_function ();
43793 else if (TARGET_PAD_RETURNS)
43794 ix86_pad_returns ();
43795 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
43796 if (TARGET_FOUR_JUMP_LIMIT)
43797 ix86_avoid_jump_mispredicts ();
43798 #endif
43802 /* Return nonzero when QImode register that must be represented via REX prefix
43803 is used. */
43804 bool
43805 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43807 int i;
43808 extract_insn_cached (insn);
43809 for (i = 0; i < recog_data.n_operands; i++)
43810 if (GENERAL_REG_P (recog_data.operand[i])
43811 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
43812 return true;
43813 return false;
43816 /* Return true when INSN mentions register that must be encoded using REX
43817 prefix. */
43818 bool
43819 x86_extended_reg_mentioned_p (rtx insn)
43821 subrtx_iterator::array_type array;
43822 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43824 const_rtx x = *iter;
43825 if (REG_P (x)
43826 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43827 return true;
43829 return false;
43832 /* If profitable, negate (without causing overflow) integer constant
43833 of mode MODE at location LOC. Return true in this case. */
43834 bool
43835 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43837 HOST_WIDE_INT val;
43839 if (!CONST_INT_P (*loc))
43840 return false;
43842 switch (mode)
43844 case DImode:
43845 /* DImode x86_64 constants must fit in 32 bits. */
43846 gcc_assert (x86_64_immediate_operand (*loc, mode));
43848 mode = SImode;
43849 break;
43851 case SImode:
43852 case HImode:
43853 case QImode:
43854 break;
43856 default:
43857 gcc_unreachable ();
43860 /* Avoid overflows. */
43861 if (mode_signbit_p (mode, *loc))
43862 return false;
43864 val = INTVAL (*loc);
43866 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43867 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
43868 if ((val < 0 && val != -128)
43869 || val == 128)
43871 *loc = GEN_INT (-val);
43872 return true;
43875 return false;
43878 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
43879 optabs would emit if we didn't have TFmode patterns. */
43881 void
43882 x86_emit_floatuns (rtx operands[2])
43884 rtx_code_label *neglab, *donelab;
43885 rtx i0, i1, f0, in, out;
43886 machine_mode mode, inmode;
43888 inmode = GET_MODE (operands[1]);
43889 gcc_assert (inmode == SImode || inmode == DImode);
43891 out = operands[0];
43892 in = force_reg (inmode, operands[1]);
43893 mode = GET_MODE (out);
43894 neglab = gen_label_rtx ();
43895 donelab = gen_label_rtx ();
43896 f0 = gen_reg_rtx (mode);
43898 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43900 expand_float (out, in, 0);
43902 emit_jump_insn (gen_jump (donelab));
43903 emit_barrier ();
43905 emit_label (neglab);
43907 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43908 1, OPTAB_DIRECT);
43909 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43910 1, OPTAB_DIRECT);
43911 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43913 expand_float (f0, i0, 0);
43915 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
43917 emit_label (donelab);
43920 static bool canonicalize_perm (struct expand_vec_perm_d *d);
43921 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43922 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43923 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43925 /* Get a vector mode of the same size as the original but with elements
43926 twice as wide. This is only guaranteed to apply to integral vectors. */
43928 static inline machine_mode
43929 get_mode_wider_vector (machine_mode o)
43931 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
43932 machine_mode n = GET_MODE_WIDER_MODE (o);
43933 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43934 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43935 return n;
43938 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
43939 fill target with val via vec_duplicate. */
43941 static bool
43942 ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43944 bool ok;
43945 rtx_insn *insn;
43946 rtx dup;
43948 /* First attempt to recognize VAL as-is. */
43949 dup = gen_rtx_VEC_DUPLICATE (mode, val);
43950 insn = emit_insn (gen_rtx_SET (target, dup));
43951 if (recog_memoized (insn) < 0)
43953 rtx_insn *seq;
43954 /* If that fails, force VAL into a register. */
43956 start_sequence ();
43957 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43958 seq = get_insns ();
43959 end_sequence ();
43960 if (seq)
43961 emit_insn_before (seq, insn);
43963 ok = recog_memoized (insn) >= 0;
43964 gcc_assert (ok);
43966 return true;
43969 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
43970 with all elements equal to VAR. Return true if successful. */
43972 static bool
43973 ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
43974 rtx target, rtx val)
43976 bool ok;
43978 switch (mode)
43980 case V2SImode:
43981 case V2SFmode:
43982 if (!mmx_ok)
43983 return false;
43984 /* FALLTHRU */
43986 case V4DFmode:
43987 case V4DImode:
43988 case V8SFmode:
43989 case V8SImode:
43990 case V2DFmode:
43991 case V2DImode:
43992 case V4SFmode:
43993 case V4SImode:
43994 case V16SImode:
43995 case V8DImode:
43996 case V16SFmode:
43997 case V8DFmode:
43998 return ix86_vector_duplicate_value (mode, target, val);
44000 case V4HImode:
44001 if (!mmx_ok)
44002 return false;
44003 if (TARGET_SSE || TARGET_3DNOW_A)
44005 rtx x;
44007 val = gen_lowpart (SImode, val);
44008 x = gen_rtx_TRUNCATE (HImode, val);
44009 x = gen_rtx_VEC_DUPLICATE (mode, x);
44010 emit_insn (gen_rtx_SET (target, x));
44011 return true;
44013 goto widen;
44015 case V8QImode:
44016 if (!mmx_ok)
44017 return false;
44018 goto widen;
44020 case V8HImode:
44021 if (TARGET_AVX2)
44022 return ix86_vector_duplicate_value (mode, target, val);
44024 if (TARGET_SSE2)
44026 struct expand_vec_perm_d dperm;
44027 rtx tmp1, tmp2;
44029 permute:
44030 memset (&dperm, 0, sizeof (dperm));
44031 dperm.target = target;
44032 dperm.vmode = mode;
44033 dperm.nelt = GET_MODE_NUNITS (mode);
44034 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
44035 dperm.one_operand_p = true;
44037 /* Extend to SImode using a paradoxical SUBREG. */
44038 tmp1 = gen_reg_rtx (SImode);
44039 emit_move_insn (tmp1, gen_lowpart (SImode, val));
44041 /* Insert the SImode value as low element of a V4SImode vector. */
44042 tmp2 = gen_reg_rtx (V4SImode);
44043 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
44044 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
44046 ok = (expand_vec_perm_1 (&dperm)
44047 || expand_vec_perm_broadcast_1 (&dperm));
44048 gcc_assert (ok);
44049 return ok;
44051 goto widen;
44053 case V16QImode:
44054 if (TARGET_AVX2)
44055 return ix86_vector_duplicate_value (mode, target, val);
44057 if (TARGET_SSE2)
44058 goto permute;
44059 goto widen;
44061 widen:
44062 /* Replicate the value once into the next wider mode and recurse. */
44064 machine_mode smode, wsmode, wvmode;
44065 rtx x;
44067 smode = GET_MODE_INNER (mode);
44068 wvmode = get_mode_wider_vector (mode);
44069 wsmode = GET_MODE_INNER (wvmode);
44071 val = convert_modes (wsmode, smode, val, true);
44072 x = expand_simple_binop (wsmode, ASHIFT, val,
44073 GEN_INT (GET_MODE_BITSIZE (smode)),
44074 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44075 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44077 x = gen_reg_rtx (wvmode);
44078 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44079 gcc_assert (ok);
44080 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44081 return ok;
44084 case V16HImode:
44085 case V32QImode:
44086 if (TARGET_AVX2)
44087 return ix86_vector_duplicate_value (mode, target, val);
44088 else
44090 machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44091 rtx x = gen_reg_rtx (hvmode);
44093 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44094 gcc_assert (ok);
44096 x = gen_rtx_VEC_CONCAT (mode, x, x);
44097 emit_insn (gen_rtx_SET (target, x));
44099 return true;
44101 case V64QImode:
44102 case V32HImode:
44103 if (TARGET_AVX512BW)
44104 return ix86_vector_duplicate_value (mode, target, val);
44105 else
44107 machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44108 rtx x = gen_reg_rtx (hvmode);
44110 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44111 gcc_assert (ok);
44113 x = gen_rtx_VEC_CONCAT (mode, x, x);
44114 emit_insn (gen_rtx_SET (target, x));
44116 return true;
44118 default:
44119 return false;
44123 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44124 whose ONE_VAR element is VAR, and other elements are zero. Return true
44125 if successful. */
44127 static bool
44128 ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44129 rtx target, rtx var, int one_var)
44131 machine_mode vsimode;
44132 rtx new_target;
44133 rtx x, tmp;
44134 bool use_vector_set = false;
44136 switch (mode)
44138 case V2DImode:
44139 /* For SSE4.1, we normally use vector set. But if the second
44140 element is zero and inter-unit moves are OK, we use movq
44141 instead. */
44142 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44143 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
44144 && one_var == 0));
44145 break;
44146 case V16QImode:
44147 case V4SImode:
44148 case V4SFmode:
44149 use_vector_set = TARGET_SSE4_1;
44150 break;
44151 case V8HImode:
44152 use_vector_set = TARGET_SSE2;
44153 break;
44154 case V4HImode:
44155 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44156 break;
44157 case V32QImode:
44158 case V16HImode:
44159 case V8SImode:
44160 case V8SFmode:
44161 case V4DFmode:
44162 use_vector_set = TARGET_AVX;
44163 break;
44164 case V4DImode:
44165 /* Use ix86_expand_vector_set in 64bit mode only. */
44166 use_vector_set = TARGET_AVX && TARGET_64BIT;
44167 break;
44168 default:
44169 break;
44172 if (use_vector_set)
44174 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
44175 var = force_reg (GET_MODE_INNER (mode), var);
44176 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44177 return true;
44180 switch (mode)
44182 case V2SFmode:
44183 case V2SImode:
44184 if (!mmx_ok)
44185 return false;
44186 /* FALLTHRU */
44188 case V2DFmode:
44189 case V2DImode:
44190 if (one_var != 0)
44191 return false;
44192 var = force_reg (GET_MODE_INNER (mode), var);
44193 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44194 emit_insn (gen_rtx_SET (target, x));
44195 return true;
44197 case V4SFmode:
44198 case V4SImode:
44199 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44200 new_target = gen_reg_rtx (mode);
44201 else
44202 new_target = target;
44203 var = force_reg (GET_MODE_INNER (mode), var);
44204 x = gen_rtx_VEC_DUPLICATE (mode, var);
44205 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44206 emit_insn (gen_rtx_SET (new_target, x));
44207 if (one_var != 0)
44209 /* We need to shuffle the value to the correct position, so
44210 create a new pseudo to store the intermediate result. */
44212 /* With SSE2, we can use the integer shuffle insns. */
44213 if (mode != V4SFmode && TARGET_SSE2)
44215 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44216 const1_rtx,
44217 GEN_INT (one_var == 1 ? 0 : 1),
44218 GEN_INT (one_var == 2 ? 0 : 1),
44219 GEN_INT (one_var == 3 ? 0 : 1)));
44220 if (target != new_target)
44221 emit_move_insn (target, new_target);
44222 return true;
44225 /* Otherwise convert the intermediate result to V4SFmode and
44226 use the SSE1 shuffle instructions. */
44227 if (mode != V4SFmode)
44229 tmp = gen_reg_rtx (V4SFmode);
44230 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44232 else
44233 tmp = new_target;
44235 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44236 const1_rtx,
44237 GEN_INT (one_var == 1 ? 0 : 1),
44238 GEN_INT (one_var == 2 ? 0+4 : 1+4),
44239 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44241 if (mode != V4SFmode)
44242 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44243 else if (tmp != target)
44244 emit_move_insn (target, tmp);
44246 else if (target != new_target)
44247 emit_move_insn (target, new_target);
44248 return true;
44250 case V8HImode:
44251 case V16QImode:
44252 vsimode = V4SImode;
44253 goto widen;
44254 case V4HImode:
44255 case V8QImode:
44256 if (!mmx_ok)
44257 return false;
44258 vsimode = V2SImode;
44259 goto widen;
44260 widen:
44261 if (one_var != 0)
44262 return false;
44264 /* Zero extend the variable element to SImode and recurse. */
44265 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44267 x = gen_reg_rtx (vsimode);
44268 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44269 var, one_var))
44270 gcc_unreachable ();
44272 emit_move_insn (target, gen_lowpart (mode, x));
44273 return true;
44275 default:
44276 return false;
44280 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
44281 consisting of the values in VALS. It is known that all elements
44282 except ONE_VAR are constants. Return true if successful. */
44284 static bool
44285 ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44286 rtx target, rtx vals, int one_var)
44288 rtx var = XVECEXP (vals, 0, one_var);
44289 machine_mode wmode;
44290 rtx const_vec, x;
44292 const_vec = copy_rtx (vals);
44293 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44294 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44296 switch (mode)
44298 case V2DFmode:
44299 case V2DImode:
44300 case V2SFmode:
44301 case V2SImode:
44302 /* For the two element vectors, it's just as easy to use
44303 the general case. */
44304 return false;
44306 case V4DImode:
44307 /* Use ix86_expand_vector_set in 64bit mode only. */
44308 if (!TARGET_64BIT)
44309 return false;
44310 case V4DFmode:
44311 case V8SFmode:
44312 case V8SImode:
44313 case V16HImode:
44314 case V32QImode:
44315 case V4SFmode:
44316 case V4SImode:
44317 case V8HImode:
44318 case V4HImode:
44319 break;
44321 case V16QImode:
44322 if (TARGET_SSE4_1)
44323 break;
44324 wmode = V8HImode;
44325 goto widen;
44326 case V8QImode:
44327 wmode = V4HImode;
44328 goto widen;
44329 widen:
44330 /* There's no way to set one QImode entry easily. Combine
44331 the variable value with its adjacent constant value, and
44332 promote to an HImode set. */
44333 x = XVECEXP (vals, 0, one_var ^ 1);
44334 if (one_var & 1)
44336 var = convert_modes (HImode, QImode, var, true);
44337 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44338 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44339 x = GEN_INT (INTVAL (x) & 0xff);
44341 else
44343 var = convert_modes (HImode, QImode, var, true);
44344 x = gen_int_mode (INTVAL (x) << 8, HImode);
44346 if (x != const0_rtx)
44347 var = expand_simple_binop (HImode, IOR, var, x, var,
44348 1, OPTAB_LIB_WIDEN);
44350 x = gen_reg_rtx (wmode);
44351 emit_move_insn (x, gen_lowpart (wmode, const_vec));
44352 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44354 emit_move_insn (target, gen_lowpart (mode, x));
44355 return true;
44357 default:
44358 return false;
44361 emit_move_insn (target, const_vec);
44362 ix86_expand_vector_set (mmx_ok, target, var, one_var);
44363 return true;
44366 /* A subroutine of ix86_expand_vector_init_general. Use vector
44367 concatenate to handle the most general case: all values variable,
44368 and none identical. */
44370 static void
44371 ix86_expand_vector_init_concat (machine_mode mode,
44372 rtx target, rtx *ops, int n)
44374 machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44375 rtx first[16], second[8], third[4];
44376 rtvec v;
44377 int i, j;
44379 switch (n)
44381 case 2:
44382 switch (mode)
44384 case V16SImode:
44385 cmode = V8SImode;
44386 break;
44387 case V16SFmode:
44388 cmode = V8SFmode;
44389 break;
44390 case V8DImode:
44391 cmode = V4DImode;
44392 break;
44393 case V8DFmode:
44394 cmode = V4DFmode;
44395 break;
44396 case V8SImode:
44397 cmode = V4SImode;
44398 break;
44399 case V8SFmode:
44400 cmode = V4SFmode;
44401 break;
44402 case V4DImode:
44403 cmode = V2DImode;
44404 break;
44405 case V4DFmode:
44406 cmode = V2DFmode;
44407 break;
44408 case V4SImode:
44409 cmode = V2SImode;
44410 break;
44411 case V4SFmode:
44412 cmode = V2SFmode;
44413 break;
44414 case V2DImode:
44415 cmode = DImode;
44416 break;
44417 case V2SImode:
44418 cmode = SImode;
44419 break;
44420 case V2DFmode:
44421 cmode = DFmode;
44422 break;
44423 case V2SFmode:
44424 cmode = SFmode;
44425 break;
44426 default:
44427 gcc_unreachable ();
44430 if (!register_operand (ops[1], cmode))
44431 ops[1] = force_reg (cmode, ops[1]);
44432 if (!register_operand (ops[0], cmode))
44433 ops[0] = force_reg (cmode, ops[0]);
44434 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0],
44435 ops[1])));
44436 break;
44438 case 4:
44439 switch (mode)
44441 case V4DImode:
44442 cmode = V2DImode;
44443 break;
44444 case V4DFmode:
44445 cmode = V2DFmode;
44446 break;
44447 case V4SImode:
44448 cmode = V2SImode;
44449 break;
44450 case V4SFmode:
44451 cmode = V2SFmode;
44452 break;
44453 default:
44454 gcc_unreachable ();
44456 goto half;
44458 case 8:
44459 switch (mode)
44461 case V8DImode:
44462 cmode = V2DImode;
44463 hmode = V4DImode;
44464 break;
44465 case V8DFmode:
44466 cmode = V2DFmode;
44467 hmode = V4DFmode;
44468 break;
44469 case V8SImode:
44470 cmode = V2SImode;
44471 hmode = V4SImode;
44472 break;
44473 case V8SFmode:
44474 cmode = V2SFmode;
44475 hmode = V4SFmode;
44476 break;
44477 default:
44478 gcc_unreachable ();
44480 goto half;
44482 case 16:
44483 switch (mode)
44485 case V16SImode:
44486 cmode = V2SImode;
44487 hmode = V4SImode;
44488 gmode = V8SImode;
44489 break;
44490 case V16SFmode:
44491 cmode = V2SFmode;
44492 hmode = V4SFmode;
44493 gmode = V8SFmode;
44494 break;
44495 default:
44496 gcc_unreachable ();
44498 goto half;
44500 half:
44501 /* FIXME: We process inputs backward to help RA. PR 36222. */
44502 i = n - 1;
44503 j = (n >> 1) - 1;
44504 for (; i > 0; i -= 2, j--)
44506 first[j] = gen_reg_rtx (cmode);
44507 v = gen_rtvec (2, ops[i - 1], ops[i]);
44508 ix86_expand_vector_init (false, first[j],
44509 gen_rtx_PARALLEL (cmode, v));
44512 n >>= 1;
44513 if (n > 4)
44515 gcc_assert (hmode != VOIDmode);
44516 gcc_assert (gmode != VOIDmode);
44517 for (i = j = 0; i < n; i += 2, j++)
44519 second[j] = gen_reg_rtx (hmode);
44520 ix86_expand_vector_init_concat (hmode, second [j],
44521 &first [i], 2);
44523 n >>= 1;
44524 for (i = j = 0; i < n; i += 2, j++)
44526 third[j] = gen_reg_rtx (gmode);
44527 ix86_expand_vector_init_concat (gmode, third[j],
44528 &second[i], 2);
44530 n >>= 1;
44531 ix86_expand_vector_init_concat (mode, target, third, n);
44533 else if (n > 2)
44535 gcc_assert (hmode != VOIDmode);
44536 for (i = j = 0; i < n; i += 2, j++)
44538 second[j] = gen_reg_rtx (hmode);
44539 ix86_expand_vector_init_concat (hmode, second [j],
44540 &first [i], 2);
44542 n >>= 1;
44543 ix86_expand_vector_init_concat (mode, target, second, n);
44545 else
44546 ix86_expand_vector_init_concat (mode, target, first, n);
44547 break;
44549 default:
44550 gcc_unreachable ();
44554 /* A subroutine of ix86_expand_vector_init_general. Use vector
44555 interleave to handle the most general case: all values variable,
44556 and none identical. */
44558 static void
44559 ix86_expand_vector_init_interleave (machine_mode mode,
44560 rtx target, rtx *ops, int n)
44562 machine_mode first_imode, second_imode, third_imode, inner_mode;
44563 int i, j;
44564 rtx op0, op1;
44565 rtx (*gen_load_even) (rtx, rtx, rtx);
44566 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44567 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44569 switch (mode)
44571 case V8HImode:
44572 gen_load_even = gen_vec_setv8hi;
44573 gen_interleave_first_low = gen_vec_interleave_lowv4si;
44574 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44575 inner_mode = HImode;
44576 first_imode = V4SImode;
44577 second_imode = V2DImode;
44578 third_imode = VOIDmode;
44579 break;
44580 case V16QImode:
44581 gen_load_even = gen_vec_setv16qi;
44582 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44583 gen_interleave_second_low = gen_vec_interleave_lowv4si;
44584 inner_mode = QImode;
44585 first_imode = V8HImode;
44586 second_imode = V4SImode;
44587 third_imode = V2DImode;
44588 break;
44589 default:
44590 gcc_unreachable ();
44593 for (i = 0; i < n; i++)
44595 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
44596 op0 = gen_reg_rtx (SImode);
44597 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44599 /* Insert the SImode value as low element of V4SImode vector. */
44600 op1 = gen_reg_rtx (V4SImode);
44601 op0 = gen_rtx_VEC_MERGE (V4SImode,
44602 gen_rtx_VEC_DUPLICATE (V4SImode,
44603 op0),
44604 CONST0_RTX (V4SImode),
44605 const1_rtx);
44606 emit_insn (gen_rtx_SET (op1, op0));
44608 /* Cast the V4SImode vector back to a vector in orignal mode. */
44609 op0 = gen_reg_rtx (mode);
44610 emit_move_insn (op0, gen_lowpart (mode, op1));
44612 /* Load even elements into the second position. */
44613 emit_insn (gen_load_even (op0,
44614 force_reg (inner_mode,
44615 ops [i + i + 1]),
44616 const1_rtx));
44618 /* Cast vector to FIRST_IMODE vector. */
44619 ops[i] = gen_reg_rtx (first_imode);
44620 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44623 /* Interleave low FIRST_IMODE vectors. */
44624 for (i = j = 0; i < n; i += 2, j++)
44626 op0 = gen_reg_rtx (first_imode);
44627 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44629 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
44630 ops[j] = gen_reg_rtx (second_imode);
44631 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44634 /* Interleave low SECOND_IMODE vectors. */
44635 switch (second_imode)
44637 case V4SImode:
44638 for (i = j = 0; i < n / 2; i += 2, j++)
44640 op0 = gen_reg_rtx (second_imode);
44641 emit_insn (gen_interleave_second_low (op0, ops[i],
44642 ops[i + 1]));
44644 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44645 vector. */
44646 ops[j] = gen_reg_rtx (third_imode);
44647 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44649 second_imode = V2DImode;
44650 gen_interleave_second_low = gen_vec_interleave_lowv2di;
44651 /* FALLTHRU */
44653 case V2DImode:
44654 op0 = gen_reg_rtx (second_imode);
44655 emit_insn (gen_interleave_second_low (op0, ops[0],
44656 ops[1]));
44658 /* Cast the SECOND_IMODE vector back to a vector on original
44659 mode. */
44660 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0)));
44661 break;
44663 default:
44664 gcc_unreachable ();
44668 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
44669 all values variable, and none identical. */
44671 static void
44672 ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44673 rtx target, rtx vals)
44675 rtx ops[64], op0, op1, op2, op3, op4, op5;
44676 machine_mode half_mode = VOIDmode;
44677 machine_mode quarter_mode = VOIDmode;
44678 int n, i;
44680 switch (mode)
44682 case V2SFmode:
44683 case V2SImode:
44684 if (!mmx_ok && !TARGET_SSE)
44685 break;
44686 /* FALLTHRU */
44688 case V16SImode:
44689 case V16SFmode:
44690 case V8DFmode:
44691 case V8DImode:
44692 case V8SFmode:
44693 case V8SImode:
44694 case V4DFmode:
44695 case V4DImode:
44696 case V4SFmode:
44697 case V4SImode:
44698 case V2DFmode:
44699 case V2DImode:
44700 n = GET_MODE_NUNITS (mode);
44701 for (i = 0; i < n; i++)
44702 ops[i] = XVECEXP (vals, 0, i);
44703 ix86_expand_vector_init_concat (mode, target, ops, n);
44704 return;
44706 case V32QImode:
44707 half_mode = V16QImode;
44708 goto half;
44710 case V16HImode:
44711 half_mode = V8HImode;
44712 goto half;
44714 half:
44715 n = GET_MODE_NUNITS (mode);
44716 for (i = 0; i < n; i++)
44717 ops[i] = XVECEXP (vals, 0, i);
44718 op0 = gen_reg_rtx (half_mode);
44719 op1 = gen_reg_rtx (half_mode);
44720 ix86_expand_vector_init_interleave (half_mode, op0, ops,
44721 n >> 2);
44722 ix86_expand_vector_init_interleave (half_mode, op1,
44723 &ops [n >> 1], n >> 2);
44724 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1)));
44725 return;
44727 case V64QImode:
44728 quarter_mode = V16QImode;
44729 half_mode = V32QImode;
44730 goto quarter;
44732 case V32HImode:
44733 quarter_mode = V8HImode;
44734 half_mode = V16HImode;
44735 goto quarter;
44737 quarter:
44738 n = GET_MODE_NUNITS (mode);
44739 for (i = 0; i < n; i++)
44740 ops[i] = XVECEXP (vals, 0, i);
44741 op0 = gen_reg_rtx (quarter_mode);
44742 op1 = gen_reg_rtx (quarter_mode);
44743 op2 = gen_reg_rtx (quarter_mode);
44744 op3 = gen_reg_rtx (quarter_mode);
44745 op4 = gen_reg_rtx (half_mode);
44746 op5 = gen_reg_rtx (half_mode);
44747 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44748 n >> 3);
44749 ix86_expand_vector_init_interleave (quarter_mode, op1,
44750 &ops [n >> 2], n >> 3);
44751 ix86_expand_vector_init_interleave (quarter_mode, op2,
44752 &ops [n >> 1], n >> 3);
44753 ix86_expand_vector_init_interleave (quarter_mode, op3,
44754 &ops [(n >> 1) | (n >> 2)], n >> 3);
44755 emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44756 emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44757 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5)));
44758 return;
44760 case V16QImode:
44761 if (!TARGET_SSE4_1)
44762 break;
44763 /* FALLTHRU */
44765 case V8HImode:
44766 if (!TARGET_SSE2)
44767 break;
44769 /* Don't use ix86_expand_vector_init_interleave if we can't
44770 move from GPR to SSE register directly. */
44771 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44772 break;
44774 n = GET_MODE_NUNITS (mode);
44775 for (i = 0; i < n; i++)
44776 ops[i] = XVECEXP (vals, 0, i);
44777 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44778 return;
44780 case V4HImode:
44781 case V8QImode:
44782 break;
44784 default:
44785 gcc_unreachable ();
44789 int i, j, n_elts, n_words, n_elt_per_word;
44790 machine_mode inner_mode;
44791 rtx words[4], shift;
44793 inner_mode = GET_MODE_INNER (mode);
44794 n_elts = GET_MODE_NUNITS (mode);
44795 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44796 n_elt_per_word = n_elts / n_words;
44797 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44799 for (i = 0; i < n_words; ++i)
44801 rtx word = NULL_RTX;
44803 for (j = 0; j < n_elt_per_word; ++j)
44805 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44806 elt = convert_modes (word_mode, inner_mode, elt, true);
44808 if (j == 0)
44809 word = elt;
44810 else
44812 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44813 word, 1, OPTAB_LIB_WIDEN);
44814 word = expand_simple_binop (word_mode, IOR, word, elt,
44815 word, 1, OPTAB_LIB_WIDEN);
44819 words[i] = word;
44822 if (n_words == 1)
44823 emit_move_insn (target, gen_lowpart (mode, words[0]));
44824 else if (n_words == 2)
44826 rtx tmp = gen_reg_rtx (mode);
44827 emit_clobber (tmp);
44828 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44829 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44830 emit_move_insn (target, tmp);
44832 else if (n_words == 4)
44834 rtx tmp = gen_reg_rtx (V4SImode);
44835 gcc_assert (word_mode == SImode);
44836 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44837 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44838 emit_move_insn (target, gen_lowpart (mode, tmp));
44840 else
44841 gcc_unreachable ();
44845 /* Initialize vector TARGET via VALS. Suppress the use of MMX
44846 instructions unless MMX_OK is true. */
44848 void
44849 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44851 machine_mode mode = GET_MODE (target);
44852 machine_mode inner_mode = GET_MODE_INNER (mode);
44853 int n_elts = GET_MODE_NUNITS (mode);
44854 int n_var = 0, one_var = -1;
44855 bool all_same = true, all_const_zero = true;
44856 int i;
44857 rtx x;
44859 for (i = 0; i < n_elts; ++i)
44861 x = XVECEXP (vals, 0, i);
44862 if (!(CONST_SCALAR_INT_P (x)
44863 || CONST_DOUBLE_P (x)
44864 || CONST_FIXED_P (x)))
44865 n_var++, one_var = i;
44866 else if (x != CONST0_RTX (inner_mode))
44867 all_const_zero = false;
44868 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44869 all_same = false;
44872 /* Constants are best loaded from the constant pool. */
44873 if (n_var == 0)
44875 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44876 return;
44879 /* If all values are identical, broadcast the value. */
44880 if (all_same
44881 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44882 XVECEXP (vals, 0, 0)))
44883 return;
44885 /* Values where only one field is non-constant are best loaded from
44886 the pool and overwritten via move later. */
44887 if (n_var == 1)
44889 if (all_const_zero
44890 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44891 XVECEXP (vals, 0, one_var),
44892 one_var))
44893 return;
44895 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44896 return;
44899 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44902 void
44903 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44905 machine_mode mode = GET_MODE (target);
44906 machine_mode inner_mode = GET_MODE_INNER (mode);
44907 machine_mode half_mode;
44908 bool use_vec_merge = false;
44909 rtx tmp;
44910 static rtx (*gen_extract[6][2]) (rtx, rtx)
44912 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44913 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44914 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44915 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44916 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44917 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44919 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44921 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44922 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44923 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44924 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44925 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44926 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44928 int i, j, n;
44929 machine_mode mmode = VOIDmode;
44930 rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44932 switch (mode)
44934 case V2SFmode:
44935 case V2SImode:
44936 if (mmx_ok)
44938 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44939 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44940 if (elt == 0)
44941 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44942 else
44943 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44944 emit_insn (gen_rtx_SET (target, tmp));
44945 return;
44947 break;
44949 case V2DImode:
44950 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44951 if (use_vec_merge)
44952 break;
44954 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44955 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44956 if (elt == 0)
44957 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44958 else
44959 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44960 emit_insn (gen_rtx_SET (target, tmp));
44961 return;
44963 case V2DFmode:
44965 rtx op0, op1;
44967 /* For the two element vectors, we implement a VEC_CONCAT with
44968 the extraction of the other element. */
44970 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
44971 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
44973 if (elt == 0)
44974 op0 = val, op1 = tmp;
44975 else
44976 op0 = tmp, op1 = val;
44978 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
44979 emit_insn (gen_rtx_SET (target, tmp));
44981 return;
44983 case V4SFmode:
44984 use_vec_merge = TARGET_SSE4_1;
44985 if (use_vec_merge)
44986 break;
44988 switch (elt)
44990 case 0:
44991 use_vec_merge = true;
44992 break;
44994 case 1:
44995 /* tmp = target = A B C D */
44996 tmp = copy_to_reg (target);
44997 /* target = A A B B */
44998 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
44999 /* target = X A B B */
45000 ix86_expand_vector_set (false, target, val, 0);
45001 /* target = A X C D */
45002 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45003 const1_rtx, const0_rtx,
45004 GEN_INT (2+4), GEN_INT (3+4)));
45005 return;
45007 case 2:
45008 /* tmp = target = A B C D */
45009 tmp = copy_to_reg (target);
45010 /* tmp = X B C D */
45011 ix86_expand_vector_set (false, tmp, val, 0);
45012 /* target = A B X D */
45013 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45014 const0_rtx, const1_rtx,
45015 GEN_INT (0+4), GEN_INT (3+4)));
45016 return;
45018 case 3:
45019 /* tmp = target = A B C D */
45020 tmp = copy_to_reg (target);
45021 /* tmp = X B C D */
45022 ix86_expand_vector_set (false, tmp, val, 0);
45023 /* target = A B X D */
45024 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45025 const0_rtx, const1_rtx,
45026 GEN_INT (2+4), GEN_INT (0+4)));
45027 return;
45029 default:
45030 gcc_unreachable ();
45032 break;
45034 case V4SImode:
45035 use_vec_merge = TARGET_SSE4_1;
45036 if (use_vec_merge)
45037 break;
45039 /* Element 0 handled by vec_merge below. */
45040 if (elt == 0)
45042 use_vec_merge = true;
45043 break;
45046 if (TARGET_SSE2)
45048 /* With SSE2, use integer shuffles to swap element 0 and ELT,
45049 store into element 0, then shuffle them back. */
45051 rtx order[4];
45053 order[0] = GEN_INT (elt);
45054 order[1] = const1_rtx;
45055 order[2] = const2_rtx;
45056 order[3] = GEN_INT (3);
45057 order[elt] = const0_rtx;
45059 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45060 order[1], order[2], order[3]));
45062 ix86_expand_vector_set (false, target, val, 0);
45064 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45065 order[1], order[2], order[3]));
45067 else
45069 /* For SSE1, we have to reuse the V4SF code. */
45070 rtx t = gen_reg_rtx (V4SFmode);
45071 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45072 emit_move_insn (target, gen_lowpart (mode, t));
45074 return;
45076 case V8HImode:
45077 use_vec_merge = TARGET_SSE2;
45078 break;
45079 case V4HImode:
45080 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45081 break;
45083 case V16QImode:
45084 use_vec_merge = TARGET_SSE4_1;
45085 break;
45087 case V8QImode:
45088 break;
45090 case V32QImode:
45091 half_mode = V16QImode;
45092 j = 0;
45093 n = 16;
45094 goto half;
45096 case V16HImode:
45097 half_mode = V8HImode;
45098 j = 1;
45099 n = 8;
45100 goto half;
45102 case V8SImode:
45103 half_mode = V4SImode;
45104 j = 2;
45105 n = 4;
45106 goto half;
45108 case V4DImode:
45109 half_mode = V2DImode;
45110 j = 3;
45111 n = 2;
45112 goto half;
45114 case V8SFmode:
45115 half_mode = V4SFmode;
45116 j = 4;
45117 n = 4;
45118 goto half;
45120 case V4DFmode:
45121 half_mode = V2DFmode;
45122 j = 5;
45123 n = 2;
45124 goto half;
45126 half:
45127 /* Compute offset. */
45128 i = elt / n;
45129 elt %= n;
45131 gcc_assert (i <= 1);
45133 /* Extract the half. */
45134 tmp = gen_reg_rtx (half_mode);
45135 emit_insn (gen_extract[j][i] (tmp, target));
45137 /* Put val in tmp at elt. */
45138 ix86_expand_vector_set (false, tmp, val, elt);
45140 /* Put it back. */
45141 emit_insn (gen_insert[j][i] (target, target, tmp));
45142 return;
45144 case V8DFmode:
45145 if (TARGET_AVX512F)
45147 mmode = QImode;
45148 gen_blendm = gen_avx512f_blendmv8df;
45150 break;
45152 case V8DImode:
45153 if (TARGET_AVX512F)
45155 mmode = QImode;
45156 gen_blendm = gen_avx512f_blendmv8di;
45158 break;
45160 case V16SFmode:
45161 if (TARGET_AVX512F)
45163 mmode = HImode;
45164 gen_blendm = gen_avx512f_blendmv16sf;
45166 break;
45168 case V16SImode:
45169 if (TARGET_AVX512F)
45171 mmode = HImode;
45172 gen_blendm = gen_avx512f_blendmv16si;
45174 break;
45176 case V32HImode:
45177 if (TARGET_AVX512F && TARGET_AVX512BW)
45179 mmode = SImode;
45180 gen_blendm = gen_avx512bw_blendmv32hi;
45182 break;
45184 case V64QImode:
45185 if (TARGET_AVX512F && TARGET_AVX512BW)
45187 mmode = DImode;
45188 gen_blendm = gen_avx512bw_blendmv64qi;
45190 break;
45192 default:
45193 break;
45196 if (mmode != VOIDmode)
45198 tmp = gen_reg_rtx (mode);
45199 emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val)));
45200 emit_insn (gen_blendm (target, tmp, target,
45201 force_reg (mmode,
45202 gen_int_mode (1 << elt, mmode))));
45204 else if (use_vec_merge)
45206 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45207 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45208 emit_insn (gen_rtx_SET (target, tmp));
45210 else
45212 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45214 emit_move_insn (mem, target);
45216 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45217 emit_move_insn (tmp, val);
45219 emit_move_insn (target, mem);
45223 void
45224 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45226 machine_mode mode = GET_MODE (vec);
45227 machine_mode inner_mode = GET_MODE_INNER (mode);
45228 bool use_vec_extr = false;
45229 rtx tmp;
45231 switch (mode)
45233 case V2SImode:
45234 case V2SFmode:
45235 if (!mmx_ok)
45236 break;
45237 /* FALLTHRU */
45239 case V2DFmode:
45240 case V2DImode:
45241 use_vec_extr = true;
45242 break;
45244 case V4SFmode:
45245 use_vec_extr = TARGET_SSE4_1;
45246 if (use_vec_extr)
45247 break;
45249 switch (elt)
45251 case 0:
45252 tmp = vec;
45253 break;
45255 case 1:
45256 case 3:
45257 tmp = gen_reg_rtx (mode);
45258 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45259 GEN_INT (elt), GEN_INT (elt),
45260 GEN_INT (elt+4), GEN_INT (elt+4)));
45261 break;
45263 case 2:
45264 tmp = gen_reg_rtx (mode);
45265 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45266 break;
45268 default:
45269 gcc_unreachable ();
45271 vec = tmp;
45272 use_vec_extr = true;
45273 elt = 0;
45274 break;
45276 case V4SImode:
45277 use_vec_extr = TARGET_SSE4_1;
45278 if (use_vec_extr)
45279 break;
45281 if (TARGET_SSE2)
45283 switch (elt)
45285 case 0:
45286 tmp = vec;
45287 break;
45289 case 1:
45290 case 3:
45291 tmp = gen_reg_rtx (mode);
45292 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45293 GEN_INT (elt), GEN_INT (elt),
45294 GEN_INT (elt), GEN_INT (elt)));
45295 break;
45297 case 2:
45298 tmp = gen_reg_rtx (mode);
45299 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45300 break;
45302 default:
45303 gcc_unreachable ();
45305 vec = tmp;
45306 use_vec_extr = true;
45307 elt = 0;
45309 else
45311 /* For SSE1, we have to reuse the V4SF code. */
45312 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45313 gen_lowpart (V4SFmode, vec), elt);
45314 return;
45316 break;
45318 case V8HImode:
45319 use_vec_extr = TARGET_SSE2;
45320 break;
45321 case V4HImode:
45322 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45323 break;
45325 case V16QImode:
45326 use_vec_extr = TARGET_SSE4_1;
45327 break;
45329 case V8SFmode:
45330 if (TARGET_AVX)
45332 tmp = gen_reg_rtx (V4SFmode);
45333 if (elt < 4)
45334 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45335 else
45336 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45337 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45338 return;
45340 break;
45342 case V4DFmode:
45343 if (TARGET_AVX)
45345 tmp = gen_reg_rtx (V2DFmode);
45346 if (elt < 2)
45347 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45348 else
45349 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45350 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45351 return;
45353 break;
45355 case V32QImode:
45356 if (TARGET_AVX)
45358 tmp = gen_reg_rtx (V16QImode);
45359 if (elt < 16)
45360 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45361 else
45362 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45363 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45364 return;
45366 break;
45368 case V16HImode:
45369 if (TARGET_AVX)
45371 tmp = gen_reg_rtx (V8HImode);
45372 if (elt < 8)
45373 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45374 else
45375 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45376 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45377 return;
45379 break;
45381 case V8SImode:
45382 if (TARGET_AVX)
45384 tmp = gen_reg_rtx (V4SImode);
45385 if (elt < 4)
45386 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45387 else
45388 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45389 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45390 return;
45392 break;
45394 case V4DImode:
45395 if (TARGET_AVX)
45397 tmp = gen_reg_rtx (V2DImode);
45398 if (elt < 2)
45399 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45400 else
45401 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45402 ix86_expand_vector_extract (false, target, tmp, elt & 1);
45403 return;
45405 break;
45407 case V32HImode:
45408 if (TARGET_AVX512BW)
45410 tmp = gen_reg_rtx (V16HImode);
45411 if (elt < 16)
45412 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45413 else
45414 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45415 ix86_expand_vector_extract (false, target, tmp, elt & 15);
45416 return;
45418 break;
45420 case V64QImode:
45421 if (TARGET_AVX512BW)
45423 tmp = gen_reg_rtx (V32QImode);
45424 if (elt < 32)
45425 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45426 else
45427 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45428 ix86_expand_vector_extract (false, target, tmp, elt & 31);
45429 return;
45431 break;
45433 case V16SFmode:
45434 tmp = gen_reg_rtx (V8SFmode);
45435 if (elt < 8)
45436 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45437 else
45438 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45439 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45440 return;
45442 case V8DFmode:
45443 tmp = gen_reg_rtx (V4DFmode);
45444 if (elt < 4)
45445 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45446 else
45447 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45448 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45449 return;
45451 case V16SImode:
45452 tmp = gen_reg_rtx (V8SImode);
45453 if (elt < 8)
45454 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45455 else
45456 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45457 ix86_expand_vector_extract (false, target, tmp, elt & 7);
45458 return;
45460 case V8DImode:
45461 tmp = gen_reg_rtx (V4DImode);
45462 if (elt < 4)
45463 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45464 else
45465 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45466 ix86_expand_vector_extract (false, target, tmp, elt & 3);
45467 return;
45469 case V8QImode:
45470 /* ??? Could extract the appropriate HImode element and shift. */
45471 default:
45472 break;
45475 if (use_vec_extr)
45477 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45478 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45480 /* Let the rtl optimizers know about the zero extension performed. */
45481 if (inner_mode == QImode || inner_mode == HImode)
45483 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45484 target = gen_lowpart (SImode, target);
45487 emit_insn (gen_rtx_SET (target, tmp));
45489 else
45491 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45493 emit_move_insn (mem, vec);
45495 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45496 emit_move_insn (target, tmp);
45500 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45501 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45502 The upper bits of DEST are undefined, though they shouldn't cause
45503 exceptions (some bits from src or all zeros are ok). */
45505 static void
45506 emit_reduc_half (rtx dest, rtx src, int i)
45508 rtx tem, d = dest;
45509 switch (GET_MODE (src))
45511 case V4SFmode:
45512 if (i == 128)
45513 tem = gen_sse_movhlps (dest, src, src);
45514 else
45515 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45516 GEN_INT (1 + 4), GEN_INT (1 + 4));
45517 break;
45518 case V2DFmode:
45519 tem = gen_vec_interleave_highv2df (dest, src, src);
45520 break;
45521 case V16QImode:
45522 case V8HImode:
45523 case V4SImode:
45524 case V2DImode:
45525 d = gen_reg_rtx (V1TImode);
45526 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45527 GEN_INT (i / 2));
45528 break;
45529 case V8SFmode:
45530 if (i == 256)
45531 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45532 else
45533 tem = gen_avx_shufps256 (dest, src, src,
45534 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45535 break;
45536 case V4DFmode:
45537 if (i == 256)
45538 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45539 else
45540 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45541 break;
45542 case V32QImode:
45543 case V16HImode:
45544 case V8SImode:
45545 case V4DImode:
45546 if (i == 256)
45548 if (GET_MODE (dest) != V4DImode)
45549 d = gen_reg_rtx (V4DImode);
45550 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45551 gen_lowpart (V4DImode, src),
45552 const1_rtx);
45554 else
45556 d = gen_reg_rtx (V2TImode);
45557 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45558 GEN_INT (i / 2));
45560 break;
45561 case V64QImode:
45562 case V32HImode:
45563 case V16SImode:
45564 case V16SFmode:
45565 case V8DImode:
45566 case V8DFmode:
45567 if (i > 128)
45568 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45569 gen_lowpart (V16SImode, src),
45570 gen_lowpart (V16SImode, src),
45571 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45572 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45573 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45574 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45575 GEN_INT (0xC), GEN_INT (0xD),
45576 GEN_INT (0xE), GEN_INT (0xF),
45577 GEN_INT (0x10), GEN_INT (0x11),
45578 GEN_INT (0x12), GEN_INT (0x13),
45579 GEN_INT (0x14), GEN_INT (0x15),
45580 GEN_INT (0x16), GEN_INT (0x17));
45581 else
45582 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45583 gen_lowpart (V16SImode, src),
45584 GEN_INT (i == 128 ? 0x2 : 0x1),
45585 GEN_INT (0x3),
45586 GEN_INT (0x3),
45587 GEN_INT (0x3),
45588 GEN_INT (i == 128 ? 0x6 : 0x5),
45589 GEN_INT (0x7),
45590 GEN_INT (0x7),
45591 GEN_INT (0x7),
45592 GEN_INT (i == 128 ? 0xA : 0x9),
45593 GEN_INT (0xB),
45594 GEN_INT (0xB),
45595 GEN_INT (0xB),
45596 GEN_INT (i == 128 ? 0xE : 0xD),
45597 GEN_INT (0xF),
45598 GEN_INT (0xF),
45599 GEN_INT (0xF));
45600 break;
45601 default:
45602 gcc_unreachable ();
45604 emit_insn (tem);
45605 if (d != dest)
45606 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45609 /* Expand a vector reduction. FN is the binary pattern to reduce;
45610 DEST is the destination; IN is the input vector. */
45612 void
45613 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45615 rtx half, dst, vec = in;
45616 machine_mode mode = GET_MODE (in);
45617 int i;
45619 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
45620 if (TARGET_SSE4_1
45621 && mode == V8HImode
45622 && fn == gen_uminv8hi3)
45624 emit_insn (gen_sse4_1_phminposuw (dest, in));
45625 return;
45628 for (i = GET_MODE_BITSIZE (mode);
45629 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45630 i >>= 1)
45632 half = gen_reg_rtx (mode);
45633 emit_reduc_half (half, vec, i);
45634 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45635 dst = dest;
45636 else
45637 dst = gen_reg_rtx (mode);
45638 emit_insn (fn (dst, half, vec));
45639 vec = dst;
45643 /* Target hook for scalar_mode_supported_p. */
45644 static bool
45645 ix86_scalar_mode_supported_p (machine_mode mode)
45647 if (DECIMAL_FLOAT_MODE_P (mode))
45648 return default_decimal_float_supported_p ();
45649 else if (mode == TFmode)
45650 return true;
45651 else
45652 return default_scalar_mode_supported_p (mode);
45655 /* Implements target hook vector_mode_supported_p. */
45656 static bool
45657 ix86_vector_mode_supported_p (machine_mode mode)
45659 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45660 return true;
45661 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45662 return true;
45663 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45664 return true;
45665 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45666 return true;
45667 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45668 return true;
45669 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45670 return true;
45671 return false;
45674 /* Implement target hook libgcc_floating_mode_supported_p. */
45675 static bool
45676 ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45678 switch (mode)
45680 case SFmode:
45681 case DFmode:
45682 case XFmode:
45683 return true;
45685 case TFmode:
45686 #ifdef IX86_NO_LIBGCC_TFMODE
45687 return false;
45688 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45689 return TARGET_LONG_DOUBLE_128;
45690 #else
45691 return true;
45692 #endif
45694 default:
45695 return false;
45699 /* Target hook for c_mode_for_suffix. */
45700 static machine_mode
45701 ix86_c_mode_for_suffix (char suffix)
45703 if (suffix == 'q')
45704 return TFmode;
45705 if (suffix == 'w')
45706 return XFmode;
45708 return VOIDmode;
45711 /* Worker function for TARGET_MD_ASM_ADJUST.
45713 We implement asm flag outputs, and maintain source compatibility
45714 with the old cc0-based compiler. */
45716 static rtx_insn *
45717 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
45718 vec<const char *> &constraints,
45719 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
45721 clobbers.safe_push (gen_rtx_REG (CCFPmode, FPSR_REG));
45722 SET_HARD_REG_BIT (clobbered_regs, FPSR_REG);
45724 bool saw_asm_flag = false;
45726 start_sequence ();
45727 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
45729 const char *con = constraints[i];
45730 if (strncmp (con, "=@cc", 4) != 0)
45731 continue;
45732 con += 4;
45733 if (strchr (con, ',') != NULL)
45735 error ("alternatives not allowed in asm flag output");
45736 continue;
45739 bool invert = false;
45740 if (con[0] == 'n')
45741 invert = true, con++;
45743 machine_mode mode = CCmode;
45744 rtx_code code = UNKNOWN;
45746 switch (con[0])
45748 case 'a':
45749 if (con[1] == 0)
45750 mode = CCAmode, code = EQ;
45751 else if (con[1] == 'e' && con[2] == 0)
45752 mode = CCCmode, code = EQ;
45753 break;
45754 case 'b':
45755 if (con[1] == 0)
45756 mode = CCCmode, code = EQ;
45757 else if (con[1] == 'e' && con[2] == 0)
45758 mode = CCAmode, code = NE;
45759 break;
45760 case 'c':
45761 if (con[1] == 0)
45762 mode = CCCmode, code = EQ;
45763 break;
45764 case 'e':
45765 if (con[1] == 0)
45766 mode = CCZmode, code = EQ;
45767 break;
45768 case 'g':
45769 if (con[1] == 0)
45770 mode = CCGCmode, code = GT;
45771 else if (con[1] == 'e' && con[2] == 0)
45772 mode = CCGCmode, code = GE;
45773 break;
45774 case 'l':
45775 if (con[1] == 0)
45776 mode = CCGCmode, code = LT;
45777 else if (con[1] == 'e' && con[2] == 0)
45778 mode = CCGCmode, code = LE;
45779 break;
45780 case 'o':
45781 if (con[1] == 0)
45782 mode = CCOmode, code = EQ;
45783 break;
45784 case 'p':
45785 if (con[1] == 0)
45786 mode = CCPmode, code = EQ;
45787 break;
45788 case 's':
45789 if (con[1] == 0)
45790 mode = CCSmode, code = EQ;
45791 break;
45792 case 'z':
45793 if (con[1] == 0)
45794 mode = CCZmode, code = EQ;
45795 break;
45797 if (code == UNKNOWN)
45799 error ("unknown asm flag output %qs", constraints[i]);
45800 continue;
45802 if (invert)
45803 code = reverse_condition (code);
45805 rtx dest = outputs[i];
45806 if (!saw_asm_flag)
45808 /* This is the first asm flag output. Here we put the flags
45809 register in as the real output and adjust the condition to
45810 allow it. */
45811 constraints[i] = "=Bf";
45812 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
45813 saw_asm_flag = true;
45815 else
45817 /* We don't need the flags register as output twice. */
45818 constraints[i] = "=X";
45819 outputs[i] = gen_rtx_SCRATCH (SImode);
45822 rtx x = gen_rtx_REG (mode, FLAGS_REG);
45823 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
45825 machine_mode dest_mode = GET_MODE (dest);
45826 if (!SCALAR_INT_MODE_P (dest_mode))
45828 error ("invalid type for asm flag output");
45829 continue;
45832 if (dest_mode == DImode && !TARGET_64BIT)
45833 dest_mode = SImode;
45835 if (dest_mode != QImode)
45837 rtx destqi = gen_reg_rtx (QImode);
45838 emit_insn (gen_rtx_SET (destqi, x));
45840 if (TARGET_ZERO_EXTEND_WITH_AND
45841 && optimize_function_for_speed_p (cfun))
45843 x = force_reg (dest_mode, const0_rtx);
45845 emit_insn (gen_movstrictqi
45846 (gen_lowpart (QImode, x), destqi));
45848 else
45849 x = gen_rtx_ZERO_EXTEND (dest_mode, destqi);
45852 if (dest_mode != GET_MODE (dest))
45854 rtx tmp = gen_reg_rtx (SImode);
45856 emit_insn (gen_rtx_SET (tmp, x));
45857 emit_insn (gen_zero_extendsidi2 (dest, tmp));
45859 else
45860 emit_insn (gen_rtx_SET (dest, x));
45862 rtx_insn *seq = get_insns ();
45863 end_sequence ();
45865 if (saw_asm_flag)
45866 return seq;
45867 else
45869 /* If we had no asm flag outputs, clobber the flags. */
45870 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
45871 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
45872 return NULL;
45876 /* Implements target vector targetm.asm.encode_section_info. */
45878 static void ATTRIBUTE_UNUSED
45879 ix86_encode_section_info (tree decl, rtx rtl, int first)
45881 default_encode_section_info (decl, rtl, first);
45883 if (ix86_in_large_data_p (decl))
45884 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45887 /* Worker function for REVERSE_CONDITION. */
45889 enum rtx_code
45890 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45892 return (mode != CCFPmode && mode != CCFPUmode
45893 ? reverse_condition (code)
45894 : reverse_condition_maybe_unordered (code));
45897 /* Output code to perform an x87 FP register move, from OPERANDS[1]
45898 to OPERANDS[0]. */
45900 const char *
45901 output_387_reg_move (rtx insn, rtx *operands)
45903 if (REG_P (operands[0]))
45905 if (REG_P (operands[1])
45906 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45908 if (REGNO (operands[0]) == FIRST_STACK_REG)
45909 return output_387_ffreep (operands, 0);
45910 return "fstp\t%y0";
45912 if (STACK_TOP_P (operands[0]))
45913 return "fld%Z1\t%y1";
45914 return "fst\t%y0";
45916 else if (MEM_P (operands[0]))
45918 gcc_assert (REG_P (operands[1]));
45919 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45920 return "fstp%Z0\t%y0";
45921 else
45923 /* There is no non-popping store to memory for XFmode.
45924 So if we need one, follow the store with a load. */
45925 if (GET_MODE (operands[0]) == XFmode)
45926 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45927 else
45928 return "fst%Z0\t%y0";
45931 else
45932 gcc_unreachable();
45935 /* Output code to perform a conditional jump to LABEL, if C2 flag in
45936 FP status register is set. */
45938 void
45939 ix86_emit_fp_unordered_jump (rtx label)
45941 rtx reg = gen_reg_rtx (HImode);
45942 rtx temp;
45944 emit_insn (gen_x86_fnstsw_1 (reg));
45946 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45948 emit_insn (gen_x86_sahf_1 (reg));
45950 temp = gen_rtx_REG (CCmode, FLAGS_REG);
45951 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45953 else
45955 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45957 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45958 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45961 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45962 gen_rtx_LABEL_REF (VOIDmode, label),
45963 pc_rtx);
45964 temp = gen_rtx_SET (pc_rtx, temp);
45966 emit_jump_insn (temp);
45967 predict_jump (REG_BR_PROB_BASE * 10 / 100);
45970 /* Output code to perform a log1p XFmode calculation. */
45972 void ix86_emit_i387_log1p (rtx op0, rtx op1)
45974 rtx_code_label *label1 = gen_label_rtx ();
45975 rtx_code_label *label2 = gen_label_rtx ();
45977 rtx tmp = gen_reg_rtx (XFmode);
45978 rtx tmp2 = gen_reg_rtx (XFmode);
45979 rtx test;
45981 emit_insn (gen_absxf2 (tmp, op1));
45982 test = gen_rtx_GE (VOIDmode, tmp,
45983 CONST_DOUBLE_FROM_REAL_VALUE (
45984 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45985 XFmode));
45986 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45988 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45989 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45990 emit_jump (label2);
45992 emit_label (label1);
45993 emit_move_insn (tmp, CONST1_RTX (XFmode));
45994 emit_insn (gen_addxf3 (tmp, op1, tmp));
45995 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45996 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45998 emit_label (label2);
46001 /* Emit code for round calculation. */
46002 void ix86_emit_i387_round (rtx op0, rtx op1)
46004 machine_mode inmode = GET_MODE (op1);
46005 machine_mode outmode = GET_MODE (op0);
46006 rtx e1, e2, res, tmp, tmp1, half;
46007 rtx scratch = gen_reg_rtx (HImode);
46008 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
46009 rtx_code_label *jump_label = gen_label_rtx ();
46010 rtx insn;
46011 rtx (*gen_abs) (rtx, rtx);
46012 rtx (*gen_neg) (rtx, rtx);
46014 switch (inmode)
46016 case SFmode:
46017 gen_abs = gen_abssf2;
46018 break;
46019 case DFmode:
46020 gen_abs = gen_absdf2;
46021 break;
46022 case XFmode:
46023 gen_abs = gen_absxf2;
46024 break;
46025 default:
46026 gcc_unreachable ();
46029 switch (outmode)
46031 case SFmode:
46032 gen_neg = gen_negsf2;
46033 break;
46034 case DFmode:
46035 gen_neg = gen_negdf2;
46036 break;
46037 case XFmode:
46038 gen_neg = gen_negxf2;
46039 break;
46040 case HImode:
46041 gen_neg = gen_neghi2;
46042 break;
46043 case SImode:
46044 gen_neg = gen_negsi2;
46045 break;
46046 case DImode:
46047 gen_neg = gen_negdi2;
46048 break;
46049 default:
46050 gcc_unreachable ();
46053 e1 = gen_reg_rtx (inmode);
46054 e2 = gen_reg_rtx (inmode);
46055 res = gen_reg_rtx (outmode);
46057 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
46059 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
46061 /* scratch = fxam(op1) */
46062 emit_insn (gen_rtx_SET (scratch,
46063 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
46064 UNSPEC_FXAM)));
46065 /* e1 = fabs(op1) */
46066 emit_insn (gen_abs (e1, op1));
46068 /* e2 = e1 + 0.5 */
46069 half = force_reg (inmode, half);
46070 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (inmode, e1, half)));
46072 /* res = floor(e2) */
46073 if (inmode != XFmode)
46075 tmp1 = gen_reg_rtx (XFmode);
46077 emit_insn (gen_rtx_SET (tmp1, gen_rtx_FLOAT_EXTEND (XFmode, e2)));
46079 else
46080 tmp1 = e2;
46082 switch (outmode)
46084 case SFmode:
46085 case DFmode:
46087 rtx tmp0 = gen_reg_rtx (XFmode);
46089 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
46091 emit_insn (gen_rtx_SET (res,
46092 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
46093 UNSPEC_TRUNC_NOOP)));
46095 break;
46096 case XFmode:
46097 emit_insn (gen_frndintxf2_floor (res, tmp1));
46098 break;
46099 case HImode:
46100 emit_insn (gen_lfloorxfhi2 (res, tmp1));
46101 break;
46102 case SImode:
46103 emit_insn (gen_lfloorxfsi2 (res, tmp1));
46104 break;
46105 case DImode:
46106 emit_insn (gen_lfloorxfdi2 (res, tmp1));
46107 break;
46108 default:
46109 gcc_unreachable ();
46112 /* flags = signbit(a) */
46113 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46115 /* if (flags) then res = -res */
46116 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46117 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46118 gen_rtx_LABEL_REF (VOIDmode, jump_label),
46119 pc_rtx);
46120 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46121 predict_jump (REG_BR_PROB_BASE * 50 / 100);
46122 JUMP_LABEL (insn) = jump_label;
46124 emit_insn (gen_neg (res, res));
46126 emit_label (jump_label);
46127 LABEL_NUSES (jump_label) = 1;
46129 emit_move_insn (op0, res);
46132 /* Output code to perform a Newton-Rhapson approximation of a single precision
46133 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
46135 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46137 rtx x0, x1, e0, e1;
46139 x0 = gen_reg_rtx (mode);
46140 e0 = gen_reg_rtx (mode);
46141 e1 = gen_reg_rtx (mode);
46142 x1 = gen_reg_rtx (mode);
46144 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46146 b = force_reg (mode, b);
46148 /* x0 = rcp(b) estimate */
46149 if (mode == V16SFmode || mode == V8DFmode)
46150 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46151 UNSPEC_RCP14)));
46152 else
46153 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46154 UNSPEC_RCP)));
46156 /* e0 = x0 * b */
46157 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
46159 /* e0 = x0 * e0 */
46160 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
46162 /* e1 = x0 + x0 */
46163 emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
46165 /* x1 = e1 - e0 */
46166 emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
46168 /* res = a * x1 */
46169 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
46172 /* Output code to perform a Newton-Rhapson approximation of a
46173 single precision floating point [reciprocal] square root. */
46175 void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46176 bool recip)
46178 rtx x0, e0, e1, e2, e3, mthree, mhalf;
46179 REAL_VALUE_TYPE r;
46180 int unspec;
46182 x0 = gen_reg_rtx (mode);
46183 e0 = gen_reg_rtx (mode);
46184 e1 = gen_reg_rtx (mode);
46185 e2 = gen_reg_rtx (mode);
46186 e3 = gen_reg_rtx (mode);
46188 real_from_integer (&r, VOIDmode, -3, SIGNED);
46189 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46191 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46192 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46193 unspec = UNSPEC_RSQRT;
46195 if (VECTOR_MODE_P (mode))
46197 mthree = ix86_build_const_vector (mode, true, mthree);
46198 mhalf = ix86_build_const_vector (mode, true, mhalf);
46199 /* There is no 512-bit rsqrt. There is however rsqrt14. */
46200 if (GET_MODE_SIZE (mode) == 64)
46201 unspec = UNSPEC_RSQRT14;
46204 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46205 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46207 a = force_reg (mode, a);
46209 /* x0 = rsqrt(a) estimate */
46210 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46211 unspec)));
46213 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
46214 if (!recip)
46216 rtx zero, mask;
46218 zero = gen_reg_rtx (mode);
46219 mask = gen_reg_rtx (mode);
46221 zero = force_reg (mode, CONST0_RTX(mode));
46223 /* Handle masked compare. */
46224 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46226 mask = gen_reg_rtx (HImode);
46227 /* Imm value 0x4 corresponds to not-equal comparison. */
46228 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46229 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46231 else
46233 emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a)));
46235 emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask)));
46239 /* e0 = x0 * a */
46240 emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
46241 /* e1 = e0 * x0 */
46242 emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0)));
46244 /* e2 = e1 - 3. */
46245 mthree = force_reg (mode, mthree);
46246 emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree)));
46248 mhalf = force_reg (mode, mhalf);
46249 if (recip)
46250 /* e3 = -.5 * x0 */
46251 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf)));
46252 else
46253 /* e3 = -.5 * e0 */
46254 emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf)));
46255 /* ret = e2 * e3 */
46256 emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3)));
46259 #ifdef TARGET_SOLARIS
46260 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
46262 static void
46263 i386_solaris_elf_named_section (const char *name, unsigned int flags,
46264 tree decl)
46266 /* With Binutils 2.15, the "@unwind" marker must be specified on
46267 every occurrence of the ".eh_frame" section, not just the first
46268 one. */
46269 if (TARGET_64BIT
46270 && strcmp (name, ".eh_frame") == 0)
46272 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46273 flags & SECTION_WRITE ? "aw" : "a");
46274 return;
46277 #ifndef USE_GAS
46278 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46280 solaris_elf_asm_comdat_section (name, flags, decl);
46281 return;
46283 #endif
46285 default_elf_asm_named_section (name, flags, decl);
46287 #endif /* TARGET_SOLARIS */
46289 /* Return the mangling of TYPE if it is an extended fundamental type. */
46291 static const char *
46292 ix86_mangle_type (const_tree type)
46294 type = TYPE_MAIN_VARIANT (type);
46296 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46297 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46298 return NULL;
46300 switch (TYPE_MODE (type))
46302 case TFmode:
46303 /* __float128 is "g". */
46304 return "g";
46305 case XFmode:
46306 /* "long double" or __float80 is "e". */
46307 return "e";
46308 default:
46309 return NULL;
46313 /* For 32-bit code we can save PIC register setup by using
46314 __stack_chk_fail_local hidden function instead of calling
46315 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
46316 register, so it is better to call __stack_chk_fail directly. */
46318 static tree ATTRIBUTE_UNUSED
46319 ix86_stack_protect_fail (void)
46321 return TARGET_64BIT
46322 ? default_external_stack_protect_fail ()
46323 : default_hidden_stack_protect_fail ();
46326 /* Select a format to encode pointers in exception handling data. CODE
46327 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
46328 true if the symbol may be affected by dynamic relocations.
46330 ??? All x86 object file formats are capable of representing this.
46331 After all, the relocation needed is the same as for the call insn.
46332 Whether or not a particular assembler allows us to enter such, I
46333 guess we'll have to see. */
46335 asm_preferred_eh_data_format (int code, int global)
46337 if (flag_pic)
46339 int type = DW_EH_PE_sdata8;
46340 if (!TARGET_64BIT
46341 || ix86_cmodel == CM_SMALL_PIC
46342 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46343 type = DW_EH_PE_sdata4;
46344 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46346 if (ix86_cmodel == CM_SMALL
46347 || (ix86_cmodel == CM_MEDIUM && code))
46348 return DW_EH_PE_udata4;
46349 return DW_EH_PE_absptr;
46352 /* Expand copysign from SIGN to the positive value ABS_VALUE
46353 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
46354 the sign-bit. */
46355 static void
46356 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46358 machine_mode mode = GET_MODE (sign);
46359 rtx sgn = gen_reg_rtx (mode);
46360 if (mask == NULL_RTX)
46362 machine_mode vmode;
46364 if (mode == SFmode)
46365 vmode = V4SFmode;
46366 else if (mode == DFmode)
46367 vmode = V2DFmode;
46368 else
46369 vmode = mode;
46371 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46372 if (!VECTOR_MODE_P (mode))
46374 /* We need to generate a scalar mode mask in this case. */
46375 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46376 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46377 mask = gen_reg_rtx (mode);
46378 emit_insn (gen_rtx_SET (mask, tmp));
46381 else
46382 mask = gen_rtx_NOT (mode, mask);
46383 emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign)));
46384 emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn)));
46387 /* Expand fabs (OP0) and return a new rtx that holds the result. The
46388 mask for masking out the sign-bit is stored in *SMASK, if that is
46389 non-null. */
46390 static rtx
46391 ix86_expand_sse_fabs (rtx op0, rtx *smask)
46393 machine_mode vmode, mode = GET_MODE (op0);
46394 rtx xa, mask;
46396 xa = gen_reg_rtx (mode);
46397 if (mode == SFmode)
46398 vmode = V4SFmode;
46399 else if (mode == DFmode)
46400 vmode = V2DFmode;
46401 else
46402 vmode = mode;
46403 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46404 if (!VECTOR_MODE_P (mode))
46406 /* We need to generate a scalar mode mask in this case. */
46407 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46408 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46409 mask = gen_reg_rtx (mode);
46410 emit_insn (gen_rtx_SET (mask, tmp));
46412 emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask)));
46414 if (smask)
46415 *smask = mask;
46417 return xa;
46420 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
46421 swapping the operands if SWAP_OPERANDS is true. The expanded
46422 code is a forward jump to a newly created label in case the
46423 comparison is true. The generated label rtx is returned. */
46424 static rtx_code_label *
46425 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46426 bool swap_operands)
46428 machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46429 rtx_code_label *label;
46430 rtx tmp;
46432 if (swap_operands)
46433 std::swap (op0, op1);
46435 label = gen_label_rtx ();
46436 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46437 emit_insn (gen_rtx_SET (tmp, gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46438 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46439 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46440 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46441 tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
46442 JUMP_LABEL (tmp) = label;
46444 return label;
46447 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46448 using comparison code CODE. Operands are swapped for the comparison if
46449 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
46450 static rtx
46451 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46452 bool swap_operands)
46454 rtx (*insn)(rtx, rtx, rtx, rtx);
46455 machine_mode mode = GET_MODE (op0);
46456 rtx mask = gen_reg_rtx (mode);
46458 if (swap_operands)
46459 std::swap (op0, op1);
46461 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46463 emit_insn (insn (mask, op0, op1,
46464 gen_rtx_fmt_ee (code, mode, op0, op1)));
46465 return mask;
46468 /* Generate and return a rtx of mode MODE for 2**n where n is the number
46469 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
46470 static rtx
46471 ix86_gen_TWO52 (machine_mode mode)
46473 REAL_VALUE_TYPE TWO52r;
46474 rtx TWO52;
46476 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46477 TWO52 = const_double_from_real_value (TWO52r, mode);
46478 TWO52 = force_reg (mode, TWO52);
46480 return TWO52;
46483 /* Expand SSE sequence for computing lround from OP1 storing
46484 into OP0. */
46485 void
46486 ix86_expand_lround (rtx op0, rtx op1)
46488 /* C code for the stuff we're doing below:
46489 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46490 return (long)tmp;
46492 machine_mode mode = GET_MODE (op1);
46493 const struct real_format *fmt;
46494 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46495 rtx adj;
46497 /* load nextafter (0.5, 0.0) */
46498 fmt = REAL_MODE_FORMAT (mode);
46499 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46500 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46502 /* adj = copysign (0.5, op1) */
46503 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46504 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46506 /* adj = op1 + adj */
46507 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46509 /* op0 = (imode)adj */
46510 expand_fix (op0, adj, 0);
46513 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
46514 into OPERAND0. */
46515 void
46516 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46518 /* C code for the stuff we're doing below (for do_floor):
46519 xi = (long)op1;
46520 xi -= (double)xi > op1 ? 1 : 0;
46521 return xi;
46523 machine_mode fmode = GET_MODE (op1);
46524 machine_mode imode = GET_MODE (op0);
46525 rtx ireg, freg, tmp;
46526 rtx_code_label *label;
46528 /* reg = (long)op1 */
46529 ireg = gen_reg_rtx (imode);
46530 expand_fix (ireg, op1, 0);
46532 /* freg = (double)reg */
46533 freg = gen_reg_rtx (fmode);
46534 expand_float (freg, ireg, 0);
46536 /* ireg = (freg > op1) ? ireg - 1 : ireg */
46537 label = ix86_expand_sse_compare_and_jump (UNLE,
46538 freg, op1, !do_floor);
46539 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46540 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46541 emit_move_insn (ireg, tmp);
46543 emit_label (label);
46544 LABEL_NUSES (label) = 1;
46546 emit_move_insn (op0, ireg);
46549 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46550 result in OPERAND0. */
46551 void
46552 ix86_expand_rint (rtx operand0, rtx operand1)
46554 /* C code for the stuff we're doing below:
46555 xa = fabs (operand1);
46556 if (!isless (xa, 2**52))
46557 return operand1;
46558 xa = xa + 2**52 - 2**52;
46559 return copysign (xa, operand1);
46561 machine_mode mode = GET_MODE (operand0);
46562 rtx res, xa, TWO52, mask;
46563 rtx_code_label *label;
46565 res = gen_reg_rtx (mode);
46566 emit_move_insn (res, operand1);
46568 /* xa = abs (operand1) */
46569 xa = ix86_expand_sse_fabs (res, &mask);
46571 /* if (!isless (xa, TWO52)) goto label; */
46572 TWO52 = ix86_gen_TWO52 (mode);
46573 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46575 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46576 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46578 ix86_sse_copysign_to_positive (res, xa, res, mask);
46580 emit_label (label);
46581 LABEL_NUSES (label) = 1;
46583 emit_move_insn (operand0, res);
46586 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46587 into OPERAND0. */
46588 void
46589 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46591 /* C code for the stuff we expand below.
46592 double xa = fabs (x), x2;
46593 if (!isless (xa, TWO52))
46594 return x;
46595 xa = xa + TWO52 - TWO52;
46596 x2 = copysign (xa, x);
46597 Compensate. Floor:
46598 if (x2 > x)
46599 x2 -= 1;
46600 Compensate. Ceil:
46601 if (x2 < x)
46602 x2 -= -1;
46603 return x2;
46605 machine_mode mode = GET_MODE (operand0);
46606 rtx xa, TWO52, tmp, one, res, mask;
46607 rtx_code_label *label;
46609 TWO52 = ix86_gen_TWO52 (mode);
46611 /* Temporary for holding the result, initialized to the input
46612 operand to ease control flow. */
46613 res = gen_reg_rtx (mode);
46614 emit_move_insn (res, operand1);
46616 /* xa = abs (operand1) */
46617 xa = ix86_expand_sse_fabs (res, &mask);
46619 /* if (!isless (xa, TWO52)) goto label; */
46620 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46622 /* xa = xa + TWO52 - TWO52; */
46623 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46624 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46626 /* xa = copysign (xa, operand1) */
46627 ix86_sse_copysign_to_positive (xa, xa, res, mask);
46629 /* generate 1.0 or -1.0 */
46630 one = force_reg (mode,
46631 const_double_from_real_value (do_floor
46632 ? dconst1 : dconstm1, mode));
46634 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46635 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46636 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46637 /* We always need to subtract here to preserve signed zero. */
46638 tmp = expand_simple_binop (mode, MINUS,
46639 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46640 emit_move_insn (res, tmp);
46642 emit_label (label);
46643 LABEL_NUSES (label) = 1;
46645 emit_move_insn (operand0, res);
46648 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46649 into OPERAND0. */
46650 void
46651 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46653 /* C code for the stuff we expand below.
46654 double xa = fabs (x), x2;
46655 if (!isless (xa, TWO52))
46656 return x;
46657 x2 = (double)(long)x;
46658 Compensate. Floor:
46659 if (x2 > x)
46660 x2 -= 1;
46661 Compensate. Ceil:
46662 if (x2 < x)
46663 x2 += 1;
46664 if (HONOR_SIGNED_ZEROS (mode))
46665 return copysign (x2, x);
46666 return x2;
46668 machine_mode mode = GET_MODE (operand0);
46669 rtx xa, xi, TWO52, tmp, one, res, mask;
46670 rtx_code_label *label;
46672 TWO52 = ix86_gen_TWO52 (mode);
46674 /* Temporary for holding the result, initialized to the input
46675 operand to ease control flow. */
46676 res = gen_reg_rtx (mode);
46677 emit_move_insn (res, operand1);
46679 /* xa = abs (operand1) */
46680 xa = ix86_expand_sse_fabs (res, &mask);
46682 /* if (!isless (xa, TWO52)) goto label; */
46683 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46685 /* xa = (double)(long)x */
46686 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46687 expand_fix (xi, res, 0);
46688 expand_float (xa, xi, 0);
46690 /* generate 1.0 */
46691 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46693 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46694 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46695 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46696 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46697 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46698 emit_move_insn (res, tmp);
46700 if (HONOR_SIGNED_ZEROS (mode))
46701 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46703 emit_label (label);
46704 LABEL_NUSES (label) = 1;
46706 emit_move_insn (operand0, res);
46709 /* Expand SSE sequence for computing round from OPERAND1 storing
46710 into OPERAND0. Sequence that works without relying on DImode truncation
46711 via cvttsd2siq that is only available on 64bit targets. */
46712 void
46713 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46715 /* C code for the stuff we expand below.
46716 double xa = fabs (x), xa2, x2;
46717 if (!isless (xa, TWO52))
46718 return x;
46719 Using the absolute value and copying back sign makes
46720 -0.0 -> -0.0 correct.
46721 xa2 = xa + TWO52 - TWO52;
46722 Compensate.
46723 dxa = xa2 - xa;
46724 if (dxa <= -0.5)
46725 xa2 += 1;
46726 else if (dxa > 0.5)
46727 xa2 -= 1;
46728 x2 = copysign (xa2, x);
46729 return x2;
46731 machine_mode mode = GET_MODE (operand0);
46732 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46733 rtx_code_label *label;
46735 TWO52 = ix86_gen_TWO52 (mode);
46737 /* Temporary for holding the result, initialized to the input
46738 operand to ease control flow. */
46739 res = gen_reg_rtx (mode);
46740 emit_move_insn (res, operand1);
46742 /* xa = abs (operand1) */
46743 xa = ix86_expand_sse_fabs (res, &mask);
46745 /* if (!isless (xa, TWO52)) goto label; */
46746 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46748 /* xa2 = xa + TWO52 - TWO52; */
46749 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46750 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46752 /* dxa = xa2 - xa; */
46753 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46755 /* generate 0.5, 1.0 and -0.5 */
46756 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46757 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46758 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46759 0, OPTAB_DIRECT);
46761 /* Compensate. */
46762 tmp = gen_reg_rtx (mode);
46763 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46764 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46765 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46766 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46767 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46768 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46769 emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
46770 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46772 /* res = copysign (xa2, operand1) */
46773 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46775 emit_label (label);
46776 LABEL_NUSES (label) = 1;
46778 emit_move_insn (operand0, res);
46781 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46782 into OPERAND0. */
46783 void
46784 ix86_expand_trunc (rtx operand0, rtx operand1)
46786 /* C code for SSE variant we expand below.
46787 double xa = fabs (x), x2;
46788 if (!isless (xa, TWO52))
46789 return x;
46790 x2 = (double)(long)x;
46791 if (HONOR_SIGNED_ZEROS (mode))
46792 return copysign (x2, x);
46793 return x2;
46795 machine_mode mode = GET_MODE (operand0);
46796 rtx xa, xi, TWO52, res, mask;
46797 rtx_code_label *label;
46799 TWO52 = ix86_gen_TWO52 (mode);
46801 /* Temporary for holding the result, initialized to the input
46802 operand to ease control flow. */
46803 res = gen_reg_rtx (mode);
46804 emit_move_insn (res, operand1);
46806 /* xa = abs (operand1) */
46807 xa = ix86_expand_sse_fabs (res, &mask);
46809 /* if (!isless (xa, TWO52)) goto label; */
46810 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46812 /* x = (double)(long)x */
46813 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46814 expand_fix (xi, res, 0);
46815 expand_float (res, xi, 0);
46817 if (HONOR_SIGNED_ZEROS (mode))
46818 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46820 emit_label (label);
46821 LABEL_NUSES (label) = 1;
46823 emit_move_insn (operand0, res);
46826 /* Expand SSE sequence for computing trunc from OPERAND1 storing
46827 into OPERAND0. */
46828 void
46829 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46831 machine_mode mode = GET_MODE (operand0);
46832 rtx xa, mask, TWO52, one, res, smask, tmp;
46833 rtx_code_label *label;
46835 /* C code for SSE variant we expand below.
46836 double xa = fabs (x), x2;
46837 if (!isless (xa, TWO52))
46838 return x;
46839 xa2 = xa + TWO52 - TWO52;
46840 Compensate:
46841 if (xa2 > xa)
46842 xa2 -= 1.0;
46843 x2 = copysign (xa2, x);
46844 return x2;
46847 TWO52 = ix86_gen_TWO52 (mode);
46849 /* Temporary for holding the result, initialized to the input
46850 operand to ease control flow. */
46851 res = gen_reg_rtx (mode);
46852 emit_move_insn (res, operand1);
46854 /* xa = abs (operand1) */
46855 xa = ix86_expand_sse_fabs (res, &smask);
46857 /* if (!isless (xa, TWO52)) goto label; */
46858 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46860 /* res = xa + TWO52 - TWO52; */
46861 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46862 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46863 emit_move_insn (res, tmp);
46865 /* generate 1.0 */
46866 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46868 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
46869 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46870 emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one)));
46871 tmp = expand_simple_binop (mode, MINUS,
46872 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46873 emit_move_insn (res, tmp);
46875 /* res = copysign (res, operand1) */
46876 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46878 emit_label (label);
46879 LABEL_NUSES (label) = 1;
46881 emit_move_insn (operand0, res);
46884 /* Expand SSE sequence for computing round from OPERAND1 storing
46885 into OPERAND0. */
46886 void
46887 ix86_expand_round (rtx operand0, rtx operand1)
46889 /* C code for the stuff we're doing below:
46890 double xa = fabs (x);
46891 if (!isless (xa, TWO52))
46892 return x;
46893 xa = (double)(long)(xa + nextafter (0.5, 0.0));
46894 return copysign (xa, x);
46896 machine_mode mode = GET_MODE (operand0);
46897 rtx res, TWO52, xa, xi, half, mask;
46898 rtx_code_label *label;
46899 const struct real_format *fmt;
46900 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46902 /* Temporary for holding the result, initialized to the input
46903 operand to ease control flow. */
46904 res = gen_reg_rtx (mode);
46905 emit_move_insn (res, operand1);
46907 TWO52 = ix86_gen_TWO52 (mode);
46908 xa = ix86_expand_sse_fabs (res, &mask);
46909 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46911 /* load nextafter (0.5, 0.0) */
46912 fmt = REAL_MODE_FORMAT (mode);
46913 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46914 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46916 /* xa = xa + 0.5 */
46917 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46918 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46920 /* xa = (double)(int64_t)xa */
46921 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46922 expand_fix (xi, xa, 0);
46923 expand_float (xa, xi, 0);
46925 /* res = copysign (xa, operand1) */
46926 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46928 emit_label (label);
46929 LABEL_NUSES (label) = 1;
46931 emit_move_insn (operand0, res);
46934 /* Expand SSE sequence for computing round
46935 from OP1 storing into OP0 using sse4 round insn. */
46936 void
46937 ix86_expand_round_sse4 (rtx op0, rtx op1)
46939 machine_mode mode = GET_MODE (op0);
46940 rtx e1, e2, res, half;
46941 const struct real_format *fmt;
46942 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46943 rtx (*gen_copysign) (rtx, rtx, rtx);
46944 rtx (*gen_round) (rtx, rtx, rtx);
46946 switch (mode)
46948 case SFmode:
46949 gen_copysign = gen_copysignsf3;
46950 gen_round = gen_sse4_1_roundsf2;
46951 break;
46952 case DFmode:
46953 gen_copysign = gen_copysigndf3;
46954 gen_round = gen_sse4_1_rounddf2;
46955 break;
46956 default:
46957 gcc_unreachable ();
46960 /* round (a) = trunc (a + copysign (0.5, a)) */
46962 /* load nextafter (0.5, 0.0) */
46963 fmt = REAL_MODE_FORMAT (mode);
46964 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46965 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46966 half = const_double_from_real_value (pred_half, mode);
46968 /* e1 = copysign (0.5, op1) */
46969 e1 = gen_reg_rtx (mode);
46970 emit_insn (gen_copysign (e1, half, op1));
46972 /* e2 = op1 + e1 */
46973 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46975 /* res = trunc (e2) */
46976 res = gen_reg_rtx (mode);
46977 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46979 emit_move_insn (op0, res);
46983 /* Table of valid machine attributes. */
46984 static const struct attribute_spec ix86_attribute_table[] =
46986 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46987 affects_type_identity } */
46988 /* Stdcall attribute says callee is responsible for popping arguments
46989 if they are not variable. */
46990 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46991 true },
46992 /* Fastcall attribute says callee is responsible for popping arguments
46993 if they are not variable. */
46994 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46995 true },
46996 /* Thiscall attribute says callee is responsible for popping arguments
46997 if they are not variable. */
46998 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46999 true },
47000 /* Cdecl attribute says the callee is a normal C declaration */
47001 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47002 true },
47003 /* Regparm attribute specifies how many integer arguments are to be
47004 passed in registers. */
47005 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
47006 true },
47007 /* Sseregparm attribute says we are using x86_64 calling conventions
47008 for FP arguments. */
47009 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
47010 true },
47011 /* The transactional memory builtins are implicitly regparm or fastcall
47012 depending on the ABI. Override the generic do-nothing attribute that
47013 these builtins were declared with. */
47014 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
47015 true },
47016 /* force_align_arg_pointer says this function realigns the stack at entry. */
47017 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
47018 false, true, true, ix86_handle_cconv_attribute, false },
47019 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47020 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
47021 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
47022 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
47023 false },
47024 #endif
47025 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
47026 false },
47027 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
47028 false },
47029 #ifdef SUBTARGET_ATTRIBUTE_TABLE
47030 SUBTARGET_ATTRIBUTE_TABLE,
47031 #endif
47032 /* ms_abi and sysv_abi calling convention function attributes. */
47033 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
47034 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
47035 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
47036 false },
47037 { "callee_pop_aggregate_return", 1, 1, false, true, true,
47038 ix86_handle_callee_pop_aggregate_return, true },
47039 /* End element. */
47040 { NULL, 0, 0, false, false, false, NULL, false }
47043 /* Implement targetm.vectorize.builtin_vectorization_cost. */
47044 static int
47045 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
47046 tree vectype, int)
47048 unsigned elements;
47050 switch (type_of_cost)
47052 case scalar_stmt:
47053 return ix86_cost->scalar_stmt_cost;
47055 case scalar_load:
47056 return ix86_cost->scalar_load_cost;
47058 case scalar_store:
47059 return ix86_cost->scalar_store_cost;
47061 case vector_stmt:
47062 return ix86_cost->vec_stmt_cost;
47064 case vector_load:
47065 return ix86_cost->vec_align_load_cost;
47067 case vector_store:
47068 return ix86_cost->vec_store_cost;
47070 case vec_to_scalar:
47071 return ix86_cost->vec_to_scalar_cost;
47073 case scalar_to_vec:
47074 return ix86_cost->scalar_to_vec_cost;
47076 case unaligned_load:
47077 case unaligned_store:
47078 return ix86_cost->vec_unalign_load_cost;
47080 case cond_branch_taken:
47081 return ix86_cost->cond_taken_branch_cost;
47083 case cond_branch_not_taken:
47084 return ix86_cost->cond_not_taken_branch_cost;
47086 case vec_perm:
47087 case vec_promote_demote:
47088 return ix86_cost->vec_stmt_cost;
47090 case vec_construct:
47091 elements = TYPE_VECTOR_SUBPARTS (vectype);
47092 return ix86_cost->vec_stmt_cost * (elements / 2 + 1);
47094 default:
47095 gcc_unreachable ();
47099 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47100 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47101 insn every time. */
47103 static GTY(()) rtx_insn *vselect_insn;
47105 /* Initialize vselect_insn. */
47107 static void
47108 init_vselect_insn (void)
47110 unsigned i;
47111 rtx x;
47113 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47114 for (i = 0; i < MAX_VECT_LEN; ++i)
47115 XVECEXP (x, 0, i) = const0_rtx;
47116 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47117 const0_rtx), x);
47118 x = gen_rtx_SET (const0_rtx, x);
47119 start_sequence ();
47120 vselect_insn = emit_insn (x);
47121 end_sequence ();
47124 /* Construct (set target (vec_select op0 (parallel perm))) and
47125 return true if that's a valid instruction in the active ISA. */
47127 static bool
47128 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47129 unsigned nelt, bool testing_p)
47131 unsigned int i;
47132 rtx x, save_vconcat;
47133 int icode;
47135 if (vselect_insn == NULL_RTX)
47136 init_vselect_insn ();
47138 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47139 PUT_NUM_ELEM (XVEC (x, 0), nelt);
47140 for (i = 0; i < nelt; ++i)
47141 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47142 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47143 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47144 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47145 SET_DEST (PATTERN (vselect_insn)) = target;
47146 icode = recog_memoized (vselect_insn);
47148 if (icode >= 0 && !testing_p)
47149 emit_insn (copy_rtx (PATTERN (vselect_insn)));
47151 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47152 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47153 INSN_CODE (vselect_insn) = -1;
47155 return icode >= 0;
47158 /* Similar, but generate a vec_concat from op0 and op1 as well. */
47160 static bool
47161 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47162 const unsigned char *perm, unsigned nelt,
47163 bool testing_p)
47165 machine_mode v2mode;
47166 rtx x;
47167 bool ok;
47169 if (vselect_insn == NULL_RTX)
47170 init_vselect_insn ();
47172 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47173 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47174 PUT_MODE (x, v2mode);
47175 XEXP (x, 0) = op0;
47176 XEXP (x, 1) = op1;
47177 ok = expand_vselect (target, x, perm, nelt, testing_p);
47178 XEXP (x, 0) = const0_rtx;
47179 XEXP (x, 1) = const0_rtx;
47180 return ok;
47183 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47184 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
47186 static bool
47187 expand_vec_perm_blend (struct expand_vec_perm_d *d)
47189 machine_mode mmode, vmode = d->vmode;
47190 unsigned i, mask, nelt = d->nelt;
47191 rtx target, op0, op1, maskop, x;
47192 rtx rperm[32], vperm;
47194 if (d->one_operand_p)
47195 return false;
47196 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47197 && (TARGET_AVX512BW
47198 || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
47200 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47202 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47204 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47206 else
47207 return false;
47209 /* This is a blend, not a permute. Elements must stay in their
47210 respective lanes. */
47211 for (i = 0; i < nelt; ++i)
47213 unsigned e = d->perm[i];
47214 if (!(e == i || e == i + nelt))
47215 return false;
47218 if (d->testing_p)
47219 return true;
47221 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
47222 decision should be extracted elsewhere, so that we only try that
47223 sequence once all budget==3 options have been tried. */
47224 target = d->target;
47225 op0 = d->op0;
47226 op1 = d->op1;
47227 mask = 0;
47229 switch (vmode)
47231 case V8DFmode:
47232 case V16SFmode:
47233 case V4DFmode:
47234 case V8SFmode:
47235 case V2DFmode:
47236 case V4SFmode:
47237 case V8HImode:
47238 case V8SImode:
47239 case V32HImode:
47240 case V64QImode:
47241 case V16SImode:
47242 case V8DImode:
47243 for (i = 0; i < nelt; ++i)
47244 mask |= (d->perm[i] >= nelt) << i;
47245 break;
47247 case V2DImode:
47248 for (i = 0; i < 2; ++i)
47249 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47250 vmode = V8HImode;
47251 goto do_subreg;
47253 case V4SImode:
47254 for (i = 0; i < 4; ++i)
47255 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47256 vmode = V8HImode;
47257 goto do_subreg;
47259 case V16QImode:
47260 /* See if bytes move in pairs so we can use pblendw with
47261 an immediate argument, rather than pblendvb with a vector
47262 argument. */
47263 for (i = 0; i < 16; i += 2)
47264 if (d->perm[i] + 1 != d->perm[i + 1])
47266 use_pblendvb:
47267 for (i = 0; i < nelt; ++i)
47268 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47270 finish_pblendvb:
47271 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47272 vperm = force_reg (vmode, vperm);
47274 if (GET_MODE_SIZE (vmode) == 16)
47275 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47276 else
47277 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47278 if (target != d->target)
47279 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47280 return true;
47283 for (i = 0; i < 8; ++i)
47284 mask |= (d->perm[i * 2] >= 16) << i;
47285 vmode = V8HImode;
47286 /* FALLTHRU */
47288 do_subreg:
47289 target = gen_reg_rtx (vmode);
47290 op0 = gen_lowpart (vmode, op0);
47291 op1 = gen_lowpart (vmode, op1);
47292 break;
47294 case V32QImode:
47295 /* See if bytes move in pairs. If not, vpblendvb must be used. */
47296 for (i = 0; i < 32; i += 2)
47297 if (d->perm[i] + 1 != d->perm[i + 1])
47298 goto use_pblendvb;
47299 /* See if bytes move in quadruplets. If yes, vpblendd
47300 with immediate can be used. */
47301 for (i = 0; i < 32; i += 4)
47302 if (d->perm[i] + 2 != d->perm[i + 2])
47303 break;
47304 if (i < 32)
47306 /* See if bytes move the same in both lanes. If yes,
47307 vpblendw with immediate can be used. */
47308 for (i = 0; i < 16; i += 2)
47309 if (d->perm[i] + 16 != d->perm[i + 16])
47310 goto use_pblendvb;
47312 /* Use vpblendw. */
47313 for (i = 0; i < 16; ++i)
47314 mask |= (d->perm[i * 2] >= 32) << i;
47315 vmode = V16HImode;
47316 goto do_subreg;
47319 /* Use vpblendd. */
47320 for (i = 0; i < 8; ++i)
47321 mask |= (d->perm[i * 4] >= 32) << i;
47322 vmode = V8SImode;
47323 goto do_subreg;
47325 case V16HImode:
47326 /* See if words move in pairs. If yes, vpblendd can be used. */
47327 for (i = 0; i < 16; i += 2)
47328 if (d->perm[i] + 1 != d->perm[i + 1])
47329 break;
47330 if (i < 16)
47332 /* See if words move the same in both lanes. If not,
47333 vpblendvb must be used. */
47334 for (i = 0; i < 8; i++)
47335 if (d->perm[i] + 8 != d->perm[i + 8])
47337 /* Use vpblendvb. */
47338 for (i = 0; i < 32; ++i)
47339 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47341 vmode = V32QImode;
47342 nelt = 32;
47343 target = gen_reg_rtx (vmode);
47344 op0 = gen_lowpart (vmode, op0);
47345 op1 = gen_lowpart (vmode, op1);
47346 goto finish_pblendvb;
47349 /* Use vpblendw. */
47350 for (i = 0; i < 16; ++i)
47351 mask |= (d->perm[i] >= 16) << i;
47352 break;
47355 /* Use vpblendd. */
47356 for (i = 0; i < 8; ++i)
47357 mask |= (d->perm[i * 2] >= 16) << i;
47358 vmode = V8SImode;
47359 goto do_subreg;
47361 case V4DImode:
47362 /* Use vpblendd. */
47363 for (i = 0; i < 4; ++i)
47364 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47365 vmode = V8SImode;
47366 goto do_subreg;
47368 default:
47369 gcc_unreachable ();
47372 switch (vmode)
47374 case V8DFmode:
47375 case V8DImode:
47376 mmode = QImode;
47377 break;
47378 case V16SFmode:
47379 case V16SImode:
47380 mmode = HImode;
47381 break;
47382 case V32HImode:
47383 mmode = SImode;
47384 break;
47385 case V64QImode:
47386 mmode = DImode;
47387 break;
47388 default:
47389 mmode = VOIDmode;
47392 if (mmode != VOIDmode)
47393 maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47394 else
47395 maskop = GEN_INT (mask);
47397 /* This matches five different patterns with the different modes. */
47398 x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47399 x = gen_rtx_SET (target, x);
47400 emit_insn (x);
47401 if (target != d->target)
47402 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47404 return true;
47407 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47408 in terms of the variable form of vpermilps.
47410 Note that we will have already failed the immediate input vpermilps,
47411 which requires that the high and low part shuffle be identical; the
47412 variable form doesn't require that. */
47414 static bool
47415 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47417 rtx rperm[8], vperm;
47418 unsigned i;
47420 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47421 return false;
47423 /* We can only permute within the 128-bit lane. */
47424 for (i = 0; i < 8; ++i)
47426 unsigned e = d->perm[i];
47427 if (i < 4 ? e >= 4 : e < 4)
47428 return false;
47431 if (d->testing_p)
47432 return true;
47434 for (i = 0; i < 8; ++i)
47436 unsigned e = d->perm[i];
47438 /* Within each 128-bit lane, the elements of op0 are numbered
47439 from 0 and the elements of op1 are numbered from 4. */
47440 if (e >= 8 + 4)
47441 e -= 8;
47442 else if (e >= 4)
47443 e -= 4;
47445 rperm[i] = GEN_INT (e);
47448 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47449 vperm = force_reg (V8SImode, vperm);
47450 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47452 return true;
47455 /* Return true if permutation D can be performed as VMODE permutation
47456 instead. */
47458 static bool
47459 valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47461 unsigned int i, j, chunk;
47463 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47464 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47465 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47466 return false;
47468 if (GET_MODE_NUNITS (vmode) >= d->nelt)
47469 return true;
47471 chunk = d->nelt / GET_MODE_NUNITS (vmode);
47472 for (i = 0; i < d->nelt; i += chunk)
47473 if (d->perm[i] & (chunk - 1))
47474 return false;
47475 else
47476 for (j = 1; j < chunk; ++j)
47477 if (d->perm[i] + j != d->perm[i + j])
47478 return false;
47480 return true;
47483 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47484 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
47486 static bool
47487 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47489 unsigned i, nelt, eltsz, mask;
47490 unsigned char perm[64];
47491 machine_mode vmode = V16QImode;
47492 rtx rperm[64], vperm, target, op0, op1;
47494 nelt = d->nelt;
47496 if (!d->one_operand_p)
47498 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47500 if (TARGET_AVX2
47501 && valid_perm_using_mode_p (V2TImode, d))
47503 if (d->testing_p)
47504 return true;
47506 /* Use vperm2i128 insn. The pattern uses
47507 V4DImode instead of V2TImode. */
47508 target = d->target;
47509 if (d->vmode != V4DImode)
47510 target = gen_reg_rtx (V4DImode);
47511 op0 = gen_lowpart (V4DImode, d->op0);
47512 op1 = gen_lowpart (V4DImode, d->op1);
47513 rperm[0]
47514 = GEN_INT ((d->perm[0] / (nelt / 2))
47515 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47516 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47517 if (target != d->target)
47518 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47519 return true;
47521 return false;
47524 else
47526 if (GET_MODE_SIZE (d->vmode) == 16)
47528 if (!TARGET_SSSE3)
47529 return false;
47531 else if (GET_MODE_SIZE (d->vmode) == 32)
47533 if (!TARGET_AVX2)
47534 return false;
47536 /* V4DImode should be already handled through
47537 expand_vselect by vpermq instruction. */
47538 gcc_assert (d->vmode != V4DImode);
47540 vmode = V32QImode;
47541 if (d->vmode == V8SImode
47542 || d->vmode == V16HImode
47543 || d->vmode == V32QImode)
47545 /* First see if vpermq can be used for
47546 V8SImode/V16HImode/V32QImode. */
47547 if (valid_perm_using_mode_p (V4DImode, d))
47549 for (i = 0; i < 4; i++)
47550 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47551 if (d->testing_p)
47552 return true;
47553 target = gen_reg_rtx (V4DImode);
47554 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47555 perm, 4, false))
47557 emit_move_insn (d->target,
47558 gen_lowpart (d->vmode, target));
47559 return true;
47561 return false;
47564 /* Next see if vpermd can be used. */
47565 if (valid_perm_using_mode_p (V8SImode, d))
47566 vmode = V8SImode;
47568 /* Or if vpermps can be used. */
47569 else if (d->vmode == V8SFmode)
47570 vmode = V8SImode;
47572 if (vmode == V32QImode)
47574 /* vpshufb only works intra lanes, it is not
47575 possible to shuffle bytes in between the lanes. */
47576 for (i = 0; i < nelt; ++i)
47577 if ((d->perm[i] ^ i) & (nelt / 2))
47578 return false;
47581 else if (GET_MODE_SIZE (d->vmode) == 64)
47583 if (!TARGET_AVX512BW)
47584 return false;
47586 /* If vpermq didn't work, vpshufb won't work either. */
47587 if (d->vmode == V8DFmode || d->vmode == V8DImode)
47588 return false;
47590 vmode = V64QImode;
47591 if (d->vmode == V16SImode
47592 || d->vmode == V32HImode
47593 || d->vmode == V64QImode)
47595 /* First see if vpermq can be used for
47596 V16SImode/V32HImode/V64QImode. */
47597 if (valid_perm_using_mode_p (V8DImode, d))
47599 for (i = 0; i < 8; i++)
47600 perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47601 if (d->testing_p)
47602 return true;
47603 target = gen_reg_rtx (V8DImode);
47604 if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47605 perm, 8, false))
47607 emit_move_insn (d->target,
47608 gen_lowpart (d->vmode, target));
47609 return true;
47611 return false;
47614 /* Next see if vpermd can be used. */
47615 if (valid_perm_using_mode_p (V16SImode, d))
47616 vmode = V16SImode;
47618 /* Or if vpermps can be used. */
47619 else if (d->vmode == V16SFmode)
47620 vmode = V16SImode;
47621 if (vmode == V64QImode)
47623 /* vpshufb only works intra lanes, it is not
47624 possible to shuffle bytes in between the lanes. */
47625 for (i = 0; i < nelt; ++i)
47626 if ((d->perm[i] ^ i) & (nelt / 4))
47627 return false;
47630 else
47631 return false;
47634 if (d->testing_p)
47635 return true;
47637 if (vmode == V8SImode)
47638 for (i = 0; i < 8; ++i)
47639 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47640 else if (vmode == V16SImode)
47641 for (i = 0; i < 16; ++i)
47642 rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47643 else
47645 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47646 if (!d->one_operand_p)
47647 mask = 2 * nelt - 1;
47648 else if (vmode == V16QImode)
47649 mask = nelt - 1;
47650 else if (vmode == V64QImode)
47651 mask = nelt / 4 - 1;
47652 else
47653 mask = nelt / 2 - 1;
47655 for (i = 0; i < nelt; ++i)
47657 unsigned j, e = d->perm[i] & mask;
47658 for (j = 0; j < eltsz; ++j)
47659 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47663 vperm = gen_rtx_CONST_VECTOR (vmode,
47664 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47665 vperm = force_reg (vmode, vperm);
47667 target = d->target;
47668 if (d->vmode != vmode)
47669 target = gen_reg_rtx (vmode);
47670 op0 = gen_lowpart (vmode, d->op0);
47671 if (d->one_operand_p)
47673 if (vmode == V16QImode)
47674 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47675 else if (vmode == V32QImode)
47676 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47677 else if (vmode == V64QImode)
47678 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47679 else if (vmode == V8SFmode)
47680 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47681 else if (vmode == V8SImode)
47682 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47683 else if (vmode == V16SFmode)
47684 emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47685 else if (vmode == V16SImode)
47686 emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47687 else
47688 gcc_unreachable ();
47690 else
47692 op1 = gen_lowpart (vmode, d->op1);
47693 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47695 if (target != d->target)
47696 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47698 return true;
47701 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
47702 in a single instruction. */
47704 static bool
47705 expand_vec_perm_1 (struct expand_vec_perm_d *d)
47707 unsigned i, nelt = d->nelt;
47708 unsigned char perm2[MAX_VECT_LEN];
47710 /* Check plain VEC_SELECT first, because AVX has instructions that could
47711 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47712 input where SEL+CONCAT may not. */
47713 if (d->one_operand_p)
47715 int mask = nelt - 1;
47716 bool identity_perm = true;
47717 bool broadcast_perm = true;
47719 for (i = 0; i < nelt; i++)
47721 perm2[i] = d->perm[i] & mask;
47722 if (perm2[i] != i)
47723 identity_perm = false;
47724 if (perm2[i])
47725 broadcast_perm = false;
47728 if (identity_perm)
47730 if (!d->testing_p)
47731 emit_move_insn (d->target, d->op0);
47732 return true;
47734 else if (broadcast_perm && TARGET_AVX2)
47736 /* Use vpbroadcast{b,w,d}. */
47737 rtx (*gen) (rtx, rtx) = NULL;
47738 switch (d->vmode)
47740 case V64QImode:
47741 if (TARGET_AVX512BW)
47742 gen = gen_avx512bw_vec_dupv64qi_1;
47743 break;
47744 case V32QImode:
47745 gen = gen_avx2_pbroadcastv32qi_1;
47746 break;
47747 case V32HImode:
47748 if (TARGET_AVX512BW)
47749 gen = gen_avx512bw_vec_dupv32hi_1;
47750 break;
47751 case V16HImode:
47752 gen = gen_avx2_pbroadcastv16hi_1;
47753 break;
47754 case V16SImode:
47755 if (TARGET_AVX512F)
47756 gen = gen_avx512f_vec_dupv16si_1;
47757 break;
47758 case V8SImode:
47759 gen = gen_avx2_pbroadcastv8si_1;
47760 break;
47761 case V16QImode:
47762 gen = gen_avx2_pbroadcastv16qi;
47763 break;
47764 case V8HImode:
47765 gen = gen_avx2_pbroadcastv8hi;
47766 break;
47767 case V16SFmode:
47768 if (TARGET_AVX512F)
47769 gen = gen_avx512f_vec_dupv16sf_1;
47770 break;
47771 case V8SFmode:
47772 gen = gen_avx2_vec_dupv8sf_1;
47773 break;
47774 case V8DFmode:
47775 if (TARGET_AVX512F)
47776 gen = gen_avx512f_vec_dupv8df_1;
47777 break;
47778 case V8DImode:
47779 if (TARGET_AVX512F)
47780 gen = gen_avx512f_vec_dupv8di_1;
47781 break;
47782 /* For other modes prefer other shuffles this function creates. */
47783 default: break;
47785 if (gen != NULL)
47787 if (!d->testing_p)
47788 emit_insn (gen (d->target, d->op0));
47789 return true;
47793 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47794 return true;
47796 /* There are plenty of patterns in sse.md that are written for
47797 SEL+CONCAT and are not replicated for a single op. Perhaps
47798 that should be changed, to avoid the nastiness here. */
47800 /* Recognize interleave style patterns, which means incrementing
47801 every other permutation operand. */
47802 for (i = 0; i < nelt; i += 2)
47804 perm2[i] = d->perm[i] & mask;
47805 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47807 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47808 d->testing_p))
47809 return true;
47811 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
47812 if (nelt >= 4)
47814 for (i = 0; i < nelt; i += 4)
47816 perm2[i + 0] = d->perm[i + 0] & mask;
47817 perm2[i + 1] = d->perm[i + 1] & mask;
47818 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47819 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47822 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47823 d->testing_p))
47824 return true;
47828 /* Finally, try the fully general two operand permute. */
47829 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47830 d->testing_p))
47831 return true;
47833 /* Recognize interleave style patterns with reversed operands. */
47834 if (!d->one_operand_p)
47836 for (i = 0; i < nelt; ++i)
47838 unsigned e = d->perm[i];
47839 if (e >= nelt)
47840 e -= nelt;
47841 else
47842 e += nelt;
47843 perm2[i] = e;
47846 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47847 d->testing_p))
47848 return true;
47851 /* Try the SSE4.1 blend variable merge instructions. */
47852 if (expand_vec_perm_blend (d))
47853 return true;
47855 /* Try one of the AVX vpermil variable permutations. */
47856 if (expand_vec_perm_vpermil (d))
47857 return true;
47859 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47860 vpshufb, vpermd, vpermps or vpermq variable permutation. */
47861 if (expand_vec_perm_pshufb (d))
47862 return true;
47864 /* Try the AVX2 vpalignr instruction. */
47865 if (expand_vec_perm_palignr (d, true))
47866 return true;
47868 /* Try the AVX512F vpermi2 instructions. */
47869 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47870 return true;
47872 return false;
47875 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
47876 in terms of a pair of pshuflw + pshufhw instructions. */
47878 static bool
47879 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47881 unsigned char perm2[MAX_VECT_LEN];
47882 unsigned i;
47883 bool ok;
47885 if (d->vmode != V8HImode || !d->one_operand_p)
47886 return false;
47888 /* The two permutations only operate in 64-bit lanes. */
47889 for (i = 0; i < 4; ++i)
47890 if (d->perm[i] >= 4)
47891 return false;
47892 for (i = 4; i < 8; ++i)
47893 if (d->perm[i] < 4)
47894 return false;
47896 if (d->testing_p)
47897 return true;
47899 /* Emit the pshuflw. */
47900 memcpy (perm2, d->perm, 4);
47901 for (i = 4; i < 8; ++i)
47902 perm2[i] = i;
47903 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47904 gcc_assert (ok);
47906 /* Emit the pshufhw. */
47907 memcpy (perm2 + 4, d->perm + 4, 4);
47908 for (i = 0; i < 4; ++i)
47909 perm2[i] = i;
47910 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47911 gcc_assert (ok);
47913 return true;
47916 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
47917 the permutation using the SSSE3 palignr instruction. This succeeds
47918 when all of the elements in PERM fit within one vector and we merely
47919 need to shift them down so that a single vector permutation has a
47920 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
47921 the vpalignr instruction itself can perform the requested permutation. */
47923 static bool
47924 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47926 unsigned i, nelt = d->nelt;
47927 unsigned min, max, minswap, maxswap;
47928 bool in_order, ok, swap = false;
47929 rtx shift, target;
47930 struct expand_vec_perm_d dcopy;
47932 /* Even with AVX, palignr only operates on 128-bit vectors,
47933 in AVX2 palignr operates on both 128-bit lanes. */
47934 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47935 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47936 return false;
47938 min = 2 * nelt;
47939 max = 0;
47940 minswap = 2 * nelt;
47941 maxswap = 0;
47942 for (i = 0; i < nelt; ++i)
47944 unsigned e = d->perm[i];
47945 unsigned eswap = d->perm[i] ^ nelt;
47946 if (GET_MODE_SIZE (d->vmode) == 32)
47948 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47949 eswap = e ^ (nelt / 2);
47951 if (e < min)
47952 min = e;
47953 if (e > max)
47954 max = e;
47955 if (eswap < minswap)
47956 minswap = eswap;
47957 if (eswap > maxswap)
47958 maxswap = eswap;
47960 if (min == 0
47961 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47963 if (d->one_operand_p
47964 || minswap == 0
47965 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47966 ? nelt / 2 : nelt))
47967 return false;
47968 swap = true;
47969 min = minswap;
47970 max = maxswap;
47973 /* Given that we have SSSE3, we know we'll be able to implement the
47974 single operand permutation after the palignr with pshufb for
47975 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
47976 first. */
47977 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47978 return true;
47980 dcopy = *d;
47981 if (swap)
47983 dcopy.op0 = d->op1;
47984 dcopy.op1 = d->op0;
47985 for (i = 0; i < nelt; ++i)
47986 dcopy.perm[i] ^= nelt;
47989 in_order = true;
47990 for (i = 0; i < nelt; ++i)
47992 unsigned e = dcopy.perm[i];
47993 if (GET_MODE_SIZE (d->vmode) == 32
47994 && e >= nelt
47995 && (e & (nelt / 2 - 1)) < min)
47996 e = e - min - (nelt / 2);
47997 else
47998 e = e - min;
47999 if (e != i)
48000 in_order = false;
48001 dcopy.perm[i] = e;
48003 dcopy.one_operand_p = true;
48005 if (single_insn_only_p && !in_order)
48006 return false;
48008 /* For AVX2, test whether we can permute the result in one instruction. */
48009 if (d->testing_p)
48011 if (in_order)
48012 return true;
48013 dcopy.op1 = dcopy.op0;
48014 return expand_vec_perm_1 (&dcopy);
48017 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
48018 if (GET_MODE_SIZE (d->vmode) == 16)
48020 target = gen_reg_rtx (TImode);
48021 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
48022 gen_lowpart (TImode, dcopy.op0), shift));
48024 else
48026 target = gen_reg_rtx (V2TImode);
48027 emit_insn (gen_avx2_palignrv2ti (target,
48028 gen_lowpart (V2TImode, dcopy.op1),
48029 gen_lowpart (V2TImode, dcopy.op0),
48030 shift));
48033 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
48035 /* Test for the degenerate case where the alignment by itself
48036 produces the desired permutation. */
48037 if (in_order)
48039 emit_move_insn (d->target, dcopy.op0);
48040 return true;
48043 ok = expand_vec_perm_1 (&dcopy);
48044 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
48046 return ok;
48049 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
48050 the permutation using the SSE4_1 pblendv instruction. Potentially
48051 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
48053 static bool
48054 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
48056 unsigned i, which, nelt = d->nelt;
48057 struct expand_vec_perm_d dcopy, dcopy1;
48058 machine_mode vmode = d->vmode;
48059 bool ok;
48061 /* Use the same checks as in expand_vec_perm_blend. */
48062 if (d->one_operand_p)
48063 return false;
48064 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
48066 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
48068 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
48070 else
48071 return false;
48073 /* Figure out where permutation elements stay not in their
48074 respective lanes. */
48075 for (i = 0, which = 0; i < nelt; ++i)
48077 unsigned e = d->perm[i];
48078 if (e != i)
48079 which |= (e < nelt ? 1 : 2);
48081 /* We can pblend the part where elements stay not in their
48082 respective lanes only when these elements are all in one
48083 half of a permutation.
48084 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48085 lanes, but both 8 and 9 >= 8
48086 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48087 respective lanes and 8 >= 8, but 2 not. */
48088 if (which != 1 && which != 2)
48089 return false;
48090 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48091 return true;
48093 /* First we apply one operand permutation to the part where
48094 elements stay not in their respective lanes. */
48095 dcopy = *d;
48096 if (which == 2)
48097 dcopy.op0 = dcopy.op1 = d->op1;
48098 else
48099 dcopy.op0 = dcopy.op1 = d->op0;
48100 if (!d->testing_p)
48101 dcopy.target = gen_reg_rtx (vmode);
48102 dcopy.one_operand_p = true;
48104 for (i = 0; i < nelt; ++i)
48105 dcopy.perm[i] = d->perm[i] & (nelt - 1);
48107 ok = expand_vec_perm_1 (&dcopy);
48108 if (GET_MODE_SIZE (vmode) != 16 && !ok)
48109 return false;
48110 else
48111 gcc_assert (ok);
48112 if (d->testing_p)
48113 return true;
48115 /* Next we put permuted elements into their positions. */
48116 dcopy1 = *d;
48117 if (which == 2)
48118 dcopy1.op1 = dcopy.target;
48119 else
48120 dcopy1.op0 = dcopy.target;
48122 for (i = 0; i < nelt; ++i)
48123 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48125 ok = expand_vec_perm_blend (&dcopy1);
48126 gcc_assert (ok);
48128 return true;
48131 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48133 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48134 a two vector permutation into a single vector permutation by using
48135 an interleave operation to merge the vectors. */
48137 static bool
48138 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48140 struct expand_vec_perm_d dremap, dfinal;
48141 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48142 unsigned HOST_WIDE_INT contents;
48143 unsigned char remap[2 * MAX_VECT_LEN];
48144 rtx_insn *seq;
48145 bool ok, same_halves = false;
48147 if (GET_MODE_SIZE (d->vmode) == 16)
48149 if (d->one_operand_p)
48150 return false;
48152 else if (GET_MODE_SIZE (d->vmode) == 32)
48154 if (!TARGET_AVX)
48155 return false;
48156 /* For 32-byte modes allow even d->one_operand_p.
48157 The lack of cross-lane shuffling in some instructions
48158 might prevent a single insn shuffle. */
48159 dfinal = *d;
48160 dfinal.testing_p = true;
48161 /* If expand_vec_perm_interleave3 can expand this into
48162 a 3 insn sequence, give up and let it be expanded as
48163 3 insn sequence. While that is one insn longer,
48164 it doesn't need a memory operand and in the common
48165 case that both interleave low and high permutations
48166 with the same operands are adjacent needs 4 insns
48167 for both after CSE. */
48168 if (expand_vec_perm_interleave3 (&dfinal))
48169 return false;
48171 else
48172 return false;
48174 /* Examine from whence the elements come. */
48175 contents = 0;
48176 for (i = 0; i < nelt; ++i)
48177 contents |= HOST_WIDE_INT_1U << d->perm[i];
48179 memset (remap, 0xff, sizeof (remap));
48180 dremap = *d;
48182 if (GET_MODE_SIZE (d->vmode) == 16)
48184 unsigned HOST_WIDE_INT h1, h2, h3, h4;
48186 /* Split the two input vectors into 4 halves. */
48187 h1 = (HOST_WIDE_INT_1U << nelt2) - 1;
48188 h2 = h1 << nelt2;
48189 h3 = h2 << nelt2;
48190 h4 = h3 << nelt2;
48192 /* If the elements from the low halves use interleave low, and similarly
48193 for interleave high. If the elements are from mis-matched halves, we
48194 can use shufps for V4SF/V4SI or do a DImode shuffle. */
48195 if ((contents & (h1 | h3)) == contents)
48197 /* punpckl* */
48198 for (i = 0; i < nelt2; ++i)
48200 remap[i] = i * 2;
48201 remap[i + nelt] = i * 2 + 1;
48202 dremap.perm[i * 2] = i;
48203 dremap.perm[i * 2 + 1] = i + nelt;
48205 if (!TARGET_SSE2 && d->vmode == V4SImode)
48206 dremap.vmode = V4SFmode;
48208 else if ((contents & (h2 | h4)) == contents)
48210 /* punpckh* */
48211 for (i = 0; i < nelt2; ++i)
48213 remap[i + nelt2] = i * 2;
48214 remap[i + nelt + nelt2] = i * 2 + 1;
48215 dremap.perm[i * 2] = i + nelt2;
48216 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48218 if (!TARGET_SSE2 && d->vmode == V4SImode)
48219 dremap.vmode = V4SFmode;
48221 else if ((contents & (h1 | h4)) == contents)
48223 /* shufps */
48224 for (i = 0; i < nelt2; ++i)
48226 remap[i] = i;
48227 remap[i + nelt + nelt2] = i + nelt2;
48228 dremap.perm[i] = i;
48229 dremap.perm[i + nelt2] = i + nelt + nelt2;
48231 if (nelt != 4)
48233 /* shufpd */
48234 dremap.vmode = V2DImode;
48235 dremap.nelt = 2;
48236 dremap.perm[0] = 0;
48237 dremap.perm[1] = 3;
48240 else if ((contents & (h2 | h3)) == contents)
48242 /* shufps */
48243 for (i = 0; i < nelt2; ++i)
48245 remap[i + nelt2] = i;
48246 remap[i + nelt] = i + nelt2;
48247 dremap.perm[i] = i + nelt2;
48248 dremap.perm[i + nelt2] = i + nelt;
48250 if (nelt != 4)
48252 /* shufpd */
48253 dremap.vmode = V2DImode;
48254 dremap.nelt = 2;
48255 dremap.perm[0] = 1;
48256 dremap.perm[1] = 2;
48259 else
48260 return false;
48262 else
48264 unsigned int nelt4 = nelt / 4, nzcnt = 0;
48265 unsigned HOST_WIDE_INT q[8];
48266 unsigned int nonzero_halves[4];
48268 /* Split the two input vectors into 8 quarters. */
48269 q[0] = (HOST_WIDE_INT_1U << nelt4) - 1;
48270 for (i = 1; i < 8; ++i)
48271 q[i] = q[0] << (nelt4 * i);
48272 for (i = 0; i < 4; ++i)
48273 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48275 nonzero_halves[nzcnt] = i;
48276 ++nzcnt;
48279 if (nzcnt == 1)
48281 gcc_assert (d->one_operand_p);
48282 nonzero_halves[1] = nonzero_halves[0];
48283 same_halves = true;
48285 else if (d->one_operand_p)
48287 gcc_assert (nonzero_halves[0] == 0);
48288 gcc_assert (nonzero_halves[1] == 1);
48291 if (nzcnt <= 2)
48293 if (d->perm[0] / nelt2 == nonzero_halves[1])
48295 /* Attempt to increase the likelihood that dfinal
48296 shuffle will be intra-lane. */
48297 std::swap (nonzero_halves[0], nonzero_halves[1]);
48300 /* vperm2f128 or vperm2i128. */
48301 for (i = 0; i < nelt2; ++i)
48303 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48304 remap[i + nonzero_halves[0] * nelt2] = i;
48305 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48306 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48309 if (d->vmode != V8SFmode
48310 && d->vmode != V4DFmode
48311 && d->vmode != V8SImode)
48313 dremap.vmode = V8SImode;
48314 dremap.nelt = 8;
48315 for (i = 0; i < 4; ++i)
48317 dremap.perm[i] = i + nonzero_halves[0] * 4;
48318 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48322 else if (d->one_operand_p)
48323 return false;
48324 else if (TARGET_AVX2
48325 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48327 /* vpunpckl* */
48328 for (i = 0; i < nelt4; ++i)
48330 remap[i] = i * 2;
48331 remap[i + nelt] = i * 2 + 1;
48332 remap[i + nelt2] = i * 2 + nelt2;
48333 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48334 dremap.perm[i * 2] = i;
48335 dremap.perm[i * 2 + 1] = i + nelt;
48336 dremap.perm[i * 2 + nelt2] = i + nelt2;
48337 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48340 else if (TARGET_AVX2
48341 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48343 /* vpunpckh* */
48344 for (i = 0; i < nelt4; ++i)
48346 remap[i + nelt4] = i * 2;
48347 remap[i + nelt + nelt4] = i * 2 + 1;
48348 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48349 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48350 dremap.perm[i * 2] = i + nelt4;
48351 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48352 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48353 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48356 else
48357 return false;
48360 /* Use the remapping array set up above to move the elements from their
48361 swizzled locations into their final destinations. */
48362 dfinal = *d;
48363 for (i = 0; i < nelt; ++i)
48365 unsigned e = remap[d->perm[i]];
48366 gcc_assert (e < nelt);
48367 /* If same_halves is true, both halves of the remapped vector are the
48368 same. Avoid cross-lane accesses if possible. */
48369 if (same_halves && i >= nelt2)
48371 gcc_assert (e < nelt2);
48372 dfinal.perm[i] = e + nelt2;
48374 else
48375 dfinal.perm[i] = e;
48377 if (!d->testing_p)
48379 dremap.target = gen_reg_rtx (dremap.vmode);
48380 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48382 dfinal.op1 = dfinal.op0;
48383 dfinal.one_operand_p = true;
48385 /* Test if the final remap can be done with a single insn. For V4SFmode or
48386 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
48387 start_sequence ();
48388 ok = expand_vec_perm_1 (&dfinal);
48389 seq = get_insns ();
48390 end_sequence ();
48392 if (!ok)
48393 return false;
48395 if (d->testing_p)
48396 return true;
48398 if (dremap.vmode != dfinal.vmode)
48400 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48401 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48404 ok = expand_vec_perm_1 (&dremap);
48405 gcc_assert (ok);
48407 emit_insn (seq);
48408 return true;
48411 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48412 a single vector cross-lane permutation into vpermq followed
48413 by any of the single insn permutations. */
48415 static bool
48416 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48418 struct expand_vec_perm_d dremap, dfinal;
48419 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48420 unsigned contents[2];
48421 bool ok;
48423 if (!(TARGET_AVX2
48424 && (d->vmode == V32QImode || d->vmode == V16HImode)
48425 && d->one_operand_p))
48426 return false;
48428 contents[0] = 0;
48429 contents[1] = 0;
48430 for (i = 0; i < nelt2; ++i)
48432 contents[0] |= 1u << (d->perm[i] / nelt4);
48433 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48436 for (i = 0; i < 2; ++i)
48438 unsigned int cnt = 0;
48439 for (j = 0; j < 4; ++j)
48440 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48441 return false;
48444 if (d->testing_p)
48445 return true;
48447 dremap = *d;
48448 dremap.vmode = V4DImode;
48449 dremap.nelt = 4;
48450 dremap.target = gen_reg_rtx (V4DImode);
48451 dremap.op0 = gen_lowpart (V4DImode, d->op0);
48452 dremap.op1 = dremap.op0;
48453 dremap.one_operand_p = true;
48454 for (i = 0; i < 2; ++i)
48456 unsigned int cnt = 0;
48457 for (j = 0; j < 4; ++j)
48458 if ((contents[i] & (1u << j)) != 0)
48459 dremap.perm[2 * i + cnt++] = j;
48460 for (; cnt < 2; ++cnt)
48461 dremap.perm[2 * i + cnt] = 0;
48464 dfinal = *d;
48465 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48466 dfinal.op1 = dfinal.op0;
48467 dfinal.one_operand_p = true;
48468 for (i = 0, j = 0; i < nelt; ++i)
48470 if (i == nelt2)
48471 j = 2;
48472 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48473 if ((d->perm[i] / nelt4) == dremap.perm[j])
48475 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48476 dfinal.perm[i] |= nelt4;
48477 else
48478 gcc_unreachable ();
48481 ok = expand_vec_perm_1 (&dremap);
48482 gcc_assert (ok);
48484 ok = expand_vec_perm_1 (&dfinal);
48485 gcc_assert (ok);
48487 return true;
48490 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
48491 a vector permutation using two instructions, vperm2f128 resp.
48492 vperm2i128 followed by any single in-lane permutation. */
48494 static bool
48495 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48497 struct expand_vec_perm_d dfirst, dsecond;
48498 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48499 bool ok;
48501 if (!TARGET_AVX
48502 || GET_MODE_SIZE (d->vmode) != 32
48503 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48504 return false;
48506 dsecond = *d;
48507 dsecond.one_operand_p = false;
48508 dsecond.testing_p = true;
48510 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48511 immediate. For perm < 16 the second permutation uses
48512 d->op0 as first operand, for perm >= 16 it uses d->op1
48513 as first operand. The second operand is the result of
48514 vperm2[fi]128. */
48515 for (perm = 0; perm < 32; perm++)
48517 /* Ignore permutations which do not move anything cross-lane. */
48518 if (perm < 16)
48520 /* The second shuffle for e.g. V4DFmode has
48521 0123 and ABCD operands.
48522 Ignore AB23, as 23 is already in the second lane
48523 of the first operand. */
48524 if ((perm & 0xc) == (1 << 2)) continue;
48525 /* And 01CD, as 01 is in the first lane of the first
48526 operand. */
48527 if ((perm & 3) == 0) continue;
48528 /* And 4567, as then the vperm2[fi]128 doesn't change
48529 anything on the original 4567 second operand. */
48530 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48532 else
48534 /* The second shuffle for e.g. V4DFmode has
48535 4567 and ABCD operands.
48536 Ignore AB67, as 67 is already in the second lane
48537 of the first operand. */
48538 if ((perm & 0xc) == (3 << 2)) continue;
48539 /* And 45CD, as 45 is in the first lane of the first
48540 operand. */
48541 if ((perm & 3) == 2) continue;
48542 /* And 0123, as then the vperm2[fi]128 doesn't change
48543 anything on the original 0123 first operand. */
48544 if ((perm & 0xf) == (1 << 2)) continue;
48547 for (i = 0; i < nelt; i++)
48549 j = d->perm[i] / nelt2;
48550 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48551 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48552 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48553 dsecond.perm[i] = d->perm[i] & (nelt - 1);
48554 else
48555 break;
48558 if (i == nelt)
48560 start_sequence ();
48561 ok = expand_vec_perm_1 (&dsecond);
48562 end_sequence ();
48564 else
48565 ok = false;
48567 if (ok)
48569 if (d->testing_p)
48570 return true;
48572 /* Found a usable second shuffle. dfirst will be
48573 vperm2f128 on d->op0 and d->op1. */
48574 dsecond.testing_p = false;
48575 dfirst = *d;
48576 dfirst.target = gen_reg_rtx (d->vmode);
48577 for (i = 0; i < nelt; i++)
48578 dfirst.perm[i] = (i & (nelt2 - 1))
48579 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48581 canonicalize_perm (&dfirst);
48582 ok = expand_vec_perm_1 (&dfirst);
48583 gcc_assert (ok);
48585 /* And dsecond is some single insn shuffle, taking
48586 d->op0 and result of vperm2f128 (if perm < 16) or
48587 d->op1 and result of vperm2f128 (otherwise). */
48588 if (perm >= 16)
48589 dsecond.op0 = dsecond.op1;
48590 dsecond.op1 = dfirst.target;
48592 ok = expand_vec_perm_1 (&dsecond);
48593 gcc_assert (ok);
48595 return true;
48598 /* For one operand, the only useful vperm2f128 permutation is 0x01
48599 aka lanes swap. */
48600 if (d->one_operand_p)
48601 return false;
48604 return false;
48607 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
48608 a two vector permutation using 2 intra-lane interleave insns
48609 and cross-lane shuffle for 32-byte vectors. */
48611 static bool
48612 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48614 unsigned i, nelt;
48615 rtx (*gen) (rtx, rtx, rtx);
48617 if (d->one_operand_p)
48618 return false;
48619 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48621 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48623 else
48624 return false;
48626 nelt = d->nelt;
48627 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48628 return false;
48629 for (i = 0; i < nelt; i += 2)
48630 if (d->perm[i] != d->perm[0] + i / 2
48631 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48632 return false;
48634 if (d->testing_p)
48635 return true;
48637 switch (d->vmode)
48639 case V32QImode:
48640 if (d->perm[0])
48641 gen = gen_vec_interleave_highv32qi;
48642 else
48643 gen = gen_vec_interleave_lowv32qi;
48644 break;
48645 case V16HImode:
48646 if (d->perm[0])
48647 gen = gen_vec_interleave_highv16hi;
48648 else
48649 gen = gen_vec_interleave_lowv16hi;
48650 break;
48651 case V8SImode:
48652 if (d->perm[0])
48653 gen = gen_vec_interleave_highv8si;
48654 else
48655 gen = gen_vec_interleave_lowv8si;
48656 break;
48657 case V4DImode:
48658 if (d->perm[0])
48659 gen = gen_vec_interleave_highv4di;
48660 else
48661 gen = gen_vec_interleave_lowv4di;
48662 break;
48663 case V8SFmode:
48664 if (d->perm[0])
48665 gen = gen_vec_interleave_highv8sf;
48666 else
48667 gen = gen_vec_interleave_lowv8sf;
48668 break;
48669 case V4DFmode:
48670 if (d->perm[0])
48671 gen = gen_vec_interleave_highv4df;
48672 else
48673 gen = gen_vec_interleave_lowv4df;
48674 break;
48675 default:
48676 gcc_unreachable ();
48679 emit_insn (gen (d->target, d->op0, d->op1));
48680 return true;
48683 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
48684 a single vector permutation using a single intra-lane vector
48685 permutation, vperm2f128 swapping the lanes and vblend* insn blending
48686 the non-swapped and swapped vectors together. */
48688 static bool
48689 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48691 struct expand_vec_perm_d dfirst, dsecond;
48692 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48693 rtx_insn *seq;
48694 bool ok;
48695 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48697 if (!TARGET_AVX
48698 || TARGET_AVX2
48699 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48700 || !d->one_operand_p)
48701 return false;
48703 dfirst = *d;
48704 for (i = 0; i < nelt; i++)
48705 dfirst.perm[i] = 0xff;
48706 for (i = 0, msk = 0; i < nelt; i++)
48708 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48709 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48710 return false;
48711 dfirst.perm[j] = d->perm[i];
48712 if (j != i)
48713 msk |= (1 << i);
48715 for (i = 0; i < nelt; i++)
48716 if (dfirst.perm[i] == 0xff)
48717 dfirst.perm[i] = i;
48719 if (!d->testing_p)
48720 dfirst.target = gen_reg_rtx (dfirst.vmode);
48722 start_sequence ();
48723 ok = expand_vec_perm_1 (&dfirst);
48724 seq = get_insns ();
48725 end_sequence ();
48727 if (!ok)
48728 return false;
48730 if (d->testing_p)
48731 return true;
48733 emit_insn (seq);
48735 dsecond = *d;
48736 dsecond.op0 = dfirst.target;
48737 dsecond.op1 = dfirst.target;
48738 dsecond.one_operand_p = true;
48739 dsecond.target = gen_reg_rtx (dsecond.vmode);
48740 for (i = 0; i < nelt; i++)
48741 dsecond.perm[i] = i ^ nelt2;
48743 ok = expand_vec_perm_1 (&dsecond);
48744 gcc_assert (ok);
48746 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48747 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48748 return true;
48751 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
48752 permutation using two vperm2f128, followed by a vshufpd insn blending
48753 the two vectors together. */
48755 static bool
48756 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48758 struct expand_vec_perm_d dfirst, dsecond, dthird;
48759 bool ok;
48761 if (!TARGET_AVX || (d->vmode != V4DFmode))
48762 return false;
48764 if (d->testing_p)
48765 return true;
48767 dfirst = *d;
48768 dsecond = *d;
48769 dthird = *d;
48771 dfirst.perm[0] = (d->perm[0] & ~1);
48772 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48773 dfirst.perm[2] = (d->perm[2] & ~1);
48774 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48775 dsecond.perm[0] = (d->perm[1] & ~1);
48776 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48777 dsecond.perm[2] = (d->perm[3] & ~1);
48778 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48779 dthird.perm[0] = (d->perm[0] % 2);
48780 dthird.perm[1] = (d->perm[1] % 2) + 4;
48781 dthird.perm[2] = (d->perm[2] % 2) + 2;
48782 dthird.perm[3] = (d->perm[3] % 2) + 6;
48784 dfirst.target = gen_reg_rtx (dfirst.vmode);
48785 dsecond.target = gen_reg_rtx (dsecond.vmode);
48786 dthird.op0 = dfirst.target;
48787 dthird.op1 = dsecond.target;
48788 dthird.one_operand_p = false;
48790 canonicalize_perm (&dfirst);
48791 canonicalize_perm (&dsecond);
48793 ok = expand_vec_perm_1 (&dfirst)
48794 && expand_vec_perm_1 (&dsecond)
48795 && expand_vec_perm_1 (&dthird);
48797 gcc_assert (ok);
48799 return true;
48802 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
48803 permutation with two pshufb insns and an ior. We should have already
48804 failed all two instruction sequences. */
48806 static bool
48807 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48809 rtx rperm[2][16], vperm, l, h, op, m128;
48810 unsigned int i, nelt, eltsz;
48812 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48813 return false;
48814 gcc_assert (!d->one_operand_p);
48816 if (d->testing_p)
48817 return true;
48819 nelt = d->nelt;
48820 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48822 /* Generate two permutation masks. If the required element is within
48823 the given vector it is shuffled into the proper lane. If the required
48824 element is in the other vector, force a zero into the lane by setting
48825 bit 7 in the permutation mask. */
48826 m128 = GEN_INT (-128);
48827 for (i = 0; i < nelt; ++i)
48829 unsigned j, e = d->perm[i];
48830 unsigned which = (e >= nelt);
48831 if (e >= nelt)
48832 e -= nelt;
48834 for (j = 0; j < eltsz; ++j)
48836 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48837 rperm[1-which][i*eltsz + j] = m128;
48841 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48842 vperm = force_reg (V16QImode, vperm);
48844 l = gen_reg_rtx (V16QImode);
48845 op = gen_lowpart (V16QImode, d->op0);
48846 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48848 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48849 vperm = force_reg (V16QImode, vperm);
48851 h = gen_reg_rtx (V16QImode);
48852 op = gen_lowpart (V16QImode, d->op1);
48853 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48855 op = d->target;
48856 if (d->vmode != V16QImode)
48857 op = gen_reg_rtx (V16QImode);
48858 emit_insn (gen_iorv16qi3 (op, l, h));
48859 if (op != d->target)
48860 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48862 return true;
48865 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
48866 with two vpshufb insns, vpermq and vpor. We should have already failed
48867 all two or three instruction sequences. */
48869 static bool
48870 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48872 rtx rperm[2][32], vperm, l, h, hp, op, m128;
48873 unsigned int i, nelt, eltsz;
48875 if (!TARGET_AVX2
48876 || !d->one_operand_p
48877 || (d->vmode != V32QImode && d->vmode != V16HImode))
48878 return false;
48880 if (d->testing_p)
48881 return true;
48883 nelt = d->nelt;
48884 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48886 /* Generate two permutation masks. If the required element is within
48887 the same lane, it is shuffled in. If the required element from the
48888 other lane, force a zero by setting bit 7 in the permutation mask.
48889 In the other mask the mask has non-negative elements if element
48890 is requested from the other lane, but also moved to the other lane,
48891 so that the result of vpshufb can have the two V2TImode halves
48892 swapped. */
48893 m128 = GEN_INT (-128);
48894 for (i = 0; i < nelt; ++i)
48896 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48897 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48899 for (j = 0; j < eltsz; ++j)
48901 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48902 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48906 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48907 vperm = force_reg (V32QImode, vperm);
48909 h = gen_reg_rtx (V32QImode);
48910 op = gen_lowpart (V32QImode, d->op0);
48911 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48913 /* Swap the 128-byte lanes of h into hp. */
48914 hp = gen_reg_rtx (V4DImode);
48915 op = gen_lowpart (V4DImode, h);
48916 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48917 const1_rtx));
48919 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48920 vperm = force_reg (V32QImode, vperm);
48922 l = gen_reg_rtx (V32QImode);
48923 op = gen_lowpart (V32QImode, d->op0);
48924 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48926 op = d->target;
48927 if (d->vmode != V32QImode)
48928 op = gen_reg_rtx (V32QImode);
48929 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48930 if (op != d->target)
48931 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48933 return true;
48936 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
48937 and extract-odd permutations of two V32QImode and V16QImode operand
48938 with two vpshufb insns, vpor and vpermq. We should have already
48939 failed all two or three instruction sequences. */
48941 static bool
48942 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48944 rtx rperm[2][32], vperm, l, h, ior, op, m128;
48945 unsigned int i, nelt, eltsz;
48947 if (!TARGET_AVX2
48948 || d->one_operand_p
48949 || (d->vmode != V32QImode && d->vmode != V16HImode))
48950 return false;
48952 for (i = 0; i < d->nelt; ++i)
48953 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48954 return false;
48956 if (d->testing_p)
48957 return true;
48959 nelt = d->nelt;
48960 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48962 /* Generate two permutation masks. In the first permutation mask
48963 the first quarter will contain indexes for the first half
48964 of the op0, the second quarter will contain bit 7 set, third quarter
48965 will contain indexes for the second half of the op0 and the
48966 last quarter bit 7 set. In the second permutation mask
48967 the first quarter will contain bit 7 set, the second quarter
48968 indexes for the first half of the op1, the third quarter bit 7 set
48969 and last quarter indexes for the second half of the op1.
48970 I.e. the first mask e.g. for V32QImode extract even will be:
48971 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48972 (all values masked with 0xf except for -128) and second mask
48973 for extract even will be
48974 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
48975 m128 = GEN_INT (-128);
48976 for (i = 0; i < nelt; ++i)
48978 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48979 unsigned which = d->perm[i] >= nelt;
48980 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48982 for (j = 0; j < eltsz; ++j)
48984 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48985 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48989 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48990 vperm = force_reg (V32QImode, vperm);
48992 l = gen_reg_rtx (V32QImode);
48993 op = gen_lowpart (V32QImode, d->op0);
48994 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48996 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48997 vperm = force_reg (V32QImode, vperm);
48999 h = gen_reg_rtx (V32QImode);
49000 op = gen_lowpart (V32QImode, d->op1);
49001 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
49003 ior = gen_reg_rtx (V32QImode);
49004 emit_insn (gen_iorv32qi3 (ior, l, h));
49006 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
49007 op = gen_reg_rtx (V4DImode);
49008 ior = gen_lowpart (V4DImode, ior);
49009 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
49010 const1_rtx, GEN_INT (3)));
49011 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49013 return true;
49016 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
49017 and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
49018 with two "and" and "pack" or two "shift" and "pack" insns. We should
49019 have already failed all two instruction sequences. */
49021 static bool
49022 expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
49024 rtx op, dop0, dop1, t, rperm[16];
49025 unsigned i, odd, c, s, nelt = d->nelt;
49026 bool end_perm = false;
49027 machine_mode half_mode;
49028 rtx (*gen_and) (rtx, rtx, rtx);
49029 rtx (*gen_pack) (rtx, rtx, rtx);
49030 rtx (*gen_shift) (rtx, rtx, rtx);
49032 if (d->one_operand_p)
49033 return false;
49035 switch (d->vmode)
49037 case V8HImode:
49038 /* Required for "pack". */
49039 if (!TARGET_SSE4_1)
49040 return false;
49041 c = 0xffff;
49042 s = 16;
49043 half_mode = V4SImode;
49044 gen_and = gen_andv4si3;
49045 gen_pack = gen_sse4_1_packusdw;
49046 gen_shift = gen_lshrv4si3;
49047 break;
49048 case V16QImode:
49049 /* No check as all instructions are SSE2. */
49050 c = 0xff;
49051 s = 8;
49052 half_mode = V8HImode;
49053 gen_and = gen_andv8hi3;
49054 gen_pack = gen_sse2_packuswb;
49055 gen_shift = gen_lshrv8hi3;
49056 break;
49057 case V16HImode:
49058 if (!TARGET_AVX2)
49059 return false;
49060 c = 0xffff;
49061 s = 16;
49062 half_mode = V8SImode;
49063 gen_and = gen_andv8si3;
49064 gen_pack = gen_avx2_packusdw;
49065 gen_shift = gen_lshrv8si3;
49066 end_perm = true;
49067 break;
49068 case V32QImode:
49069 if (!TARGET_AVX2)
49070 return false;
49071 c = 0xff;
49072 s = 8;
49073 half_mode = V16HImode;
49074 gen_and = gen_andv16hi3;
49075 gen_pack = gen_avx2_packuswb;
49076 gen_shift = gen_lshrv16hi3;
49077 end_perm = true;
49078 break;
49079 default:
49080 /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49081 general shuffles. */
49082 return false;
49085 /* Check that permutation is even or odd. */
49086 odd = d->perm[0];
49087 if (odd > 1)
49088 return false;
49090 for (i = 1; i < nelt; ++i)
49091 if (d->perm[i] != 2 * i + odd)
49092 return false;
49094 if (d->testing_p)
49095 return true;
49097 dop0 = gen_reg_rtx (half_mode);
49098 dop1 = gen_reg_rtx (half_mode);
49099 if (odd == 0)
49101 for (i = 0; i < nelt / 2; i++)
49102 rperm[i] = GEN_INT (c);
49103 t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49104 t = force_reg (half_mode, t);
49105 emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49106 emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49108 else
49110 emit_insn (gen_shift (dop0,
49111 gen_lowpart (half_mode, d->op0),
49112 GEN_INT (s)));
49113 emit_insn (gen_shift (dop1,
49114 gen_lowpart (half_mode, d->op1),
49115 GEN_INT (s)));
49117 /* In AVX2 for 256 bit case we need to permute pack result. */
49118 if (TARGET_AVX2 && end_perm)
49120 op = gen_reg_rtx (d->vmode);
49121 t = gen_reg_rtx (V4DImode);
49122 emit_insn (gen_pack (op, dop0, dop1));
49123 emit_insn (gen_avx2_permv4di_1 (t,
49124 gen_lowpart (V4DImode, op),
49125 const0_rtx,
49126 const2_rtx,
49127 const1_rtx,
49128 GEN_INT (3)));
49129 emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49131 else
49132 emit_insn (gen_pack (d->target, dop0, dop1));
49134 return true;
49137 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
49138 and extract-odd permutations. */
49140 static bool
49141 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49143 rtx t1, t2, t3, t4, t5;
49145 switch (d->vmode)
49147 case V4DFmode:
49148 if (d->testing_p)
49149 break;
49150 t1 = gen_reg_rtx (V4DFmode);
49151 t2 = gen_reg_rtx (V4DFmode);
49153 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49154 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49155 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49157 /* Now an unpck[lh]pd will produce the result required. */
49158 if (odd)
49159 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49160 else
49161 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49162 emit_insn (t3);
49163 break;
49165 case V8SFmode:
49167 int mask = odd ? 0xdd : 0x88;
49169 if (d->testing_p)
49170 break;
49171 t1 = gen_reg_rtx (V8SFmode);
49172 t2 = gen_reg_rtx (V8SFmode);
49173 t3 = gen_reg_rtx (V8SFmode);
49175 /* Shuffle within the 128-bit lanes to produce:
49176 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
49177 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49178 GEN_INT (mask)));
49180 /* Shuffle the lanes around to produce:
49181 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
49182 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49183 GEN_INT (0x3)));
49185 /* Shuffle within the 128-bit lanes to produce:
49186 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
49187 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49189 /* Shuffle within the 128-bit lanes to produce:
49190 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
49191 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49193 /* Shuffle the lanes around to produce:
49194 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
49195 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49196 GEN_INT (0x20)));
49198 break;
49200 case V2DFmode:
49201 case V4SFmode:
49202 case V2DImode:
49203 case V4SImode:
49204 /* These are always directly implementable by expand_vec_perm_1. */
49205 gcc_unreachable ();
49207 case V8HImode:
49208 if (TARGET_SSE4_1)
49209 return expand_vec_perm_even_odd_pack (d);
49210 else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49211 return expand_vec_perm_pshufb2 (d);
49212 else
49214 if (d->testing_p)
49215 break;
49216 /* We need 2*log2(N)-1 operations to achieve odd/even
49217 with interleave. */
49218 t1 = gen_reg_rtx (V8HImode);
49219 t2 = gen_reg_rtx (V8HImode);
49220 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49221 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49222 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49223 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49224 if (odd)
49225 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49226 else
49227 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49228 emit_insn (t3);
49230 break;
49232 case V16QImode:
49233 return expand_vec_perm_even_odd_pack (d);
49235 case V16HImode:
49236 case V32QImode:
49237 return expand_vec_perm_even_odd_pack (d);
49239 case V4DImode:
49240 if (!TARGET_AVX2)
49242 struct expand_vec_perm_d d_copy = *d;
49243 d_copy.vmode = V4DFmode;
49244 if (d->testing_p)
49245 d_copy.target = gen_lowpart (V4DFmode, d->target);
49246 else
49247 d_copy.target = gen_reg_rtx (V4DFmode);
49248 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49249 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49250 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49252 if (!d->testing_p)
49253 emit_move_insn (d->target,
49254 gen_lowpart (V4DImode, d_copy.target));
49255 return true;
49257 return false;
49260 if (d->testing_p)
49261 break;
49263 t1 = gen_reg_rtx (V4DImode);
49264 t2 = gen_reg_rtx (V4DImode);
49266 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
49267 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49268 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49270 /* Now an vpunpck[lh]qdq will produce the result required. */
49271 if (odd)
49272 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49273 else
49274 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49275 emit_insn (t3);
49276 break;
49278 case V8SImode:
49279 if (!TARGET_AVX2)
49281 struct expand_vec_perm_d d_copy = *d;
49282 d_copy.vmode = V8SFmode;
49283 if (d->testing_p)
49284 d_copy.target = gen_lowpart (V8SFmode, d->target);
49285 else
49286 d_copy.target = gen_reg_rtx (V8SFmode);
49287 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49288 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49289 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49291 if (!d->testing_p)
49292 emit_move_insn (d->target,
49293 gen_lowpart (V8SImode, d_copy.target));
49294 return true;
49296 return false;
49299 if (d->testing_p)
49300 break;
49302 t1 = gen_reg_rtx (V8SImode);
49303 t2 = gen_reg_rtx (V8SImode);
49304 t3 = gen_reg_rtx (V4DImode);
49305 t4 = gen_reg_rtx (V4DImode);
49306 t5 = gen_reg_rtx (V4DImode);
49308 /* Shuffle the lanes around into
49309 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
49310 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49311 gen_lowpart (V4DImode, d->op1),
49312 GEN_INT (0x20)));
49313 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49314 gen_lowpart (V4DImode, d->op1),
49315 GEN_INT (0x31)));
49317 /* Swap the 2nd and 3rd position in each lane into
49318 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
49319 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49320 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49321 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49322 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49324 /* Now an vpunpck[lh]qdq will produce
49325 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
49326 if (odd)
49327 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49328 gen_lowpart (V4DImode, t2));
49329 else
49330 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49331 gen_lowpart (V4DImode, t2));
49332 emit_insn (t3);
49333 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49334 break;
49336 default:
49337 gcc_unreachable ();
49340 return true;
49343 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49344 extract-even and extract-odd permutations. */
49346 static bool
49347 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49349 unsigned i, odd, nelt = d->nelt;
49351 odd = d->perm[0];
49352 if (odd != 0 && odd != 1)
49353 return false;
49355 for (i = 1; i < nelt; ++i)
49356 if (d->perm[i] != 2 * i + odd)
49357 return false;
49359 return expand_vec_perm_even_odd_1 (d, odd);
49362 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
49363 permutations. We assume that expand_vec_perm_1 has already failed. */
49365 static bool
49366 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49368 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49369 machine_mode vmode = d->vmode;
49370 unsigned char perm2[4];
49371 rtx op0 = d->op0, dest;
49372 bool ok;
49374 switch (vmode)
49376 case V4DFmode:
49377 case V8SFmode:
49378 /* These are special-cased in sse.md so that we can optionally
49379 use the vbroadcast instruction. They expand to two insns
49380 if the input happens to be in a register. */
49381 gcc_unreachable ();
49383 case V2DFmode:
49384 case V2DImode:
49385 case V4SFmode:
49386 case V4SImode:
49387 /* These are always implementable using standard shuffle patterns. */
49388 gcc_unreachable ();
49390 case V8HImode:
49391 case V16QImode:
49392 /* These can be implemented via interleave. We save one insn by
49393 stopping once we have promoted to V4SImode and then use pshufd. */
49394 if (d->testing_p)
49395 return true;
49398 rtx dest;
49399 rtx (*gen) (rtx, rtx, rtx)
49400 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49401 : gen_vec_interleave_lowv8hi;
49403 if (elt >= nelt2)
49405 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49406 : gen_vec_interleave_highv8hi;
49407 elt -= nelt2;
49409 nelt2 /= 2;
49411 dest = gen_reg_rtx (vmode);
49412 emit_insn (gen (dest, op0, op0));
49413 vmode = get_mode_wider_vector (vmode);
49414 op0 = gen_lowpart (vmode, dest);
49416 while (vmode != V4SImode);
49418 memset (perm2, elt, 4);
49419 dest = gen_reg_rtx (V4SImode);
49420 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49421 gcc_assert (ok);
49422 if (!d->testing_p)
49423 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49424 return true;
49426 case V64QImode:
49427 case V32QImode:
49428 case V16HImode:
49429 case V8SImode:
49430 case V4DImode:
49431 /* For AVX2 broadcasts of the first element vpbroadcast* or
49432 vpermq should be used by expand_vec_perm_1. */
49433 gcc_assert (!TARGET_AVX2 || d->perm[0]);
49434 return false;
49436 default:
49437 gcc_unreachable ();
49441 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
49442 broadcast permutations. */
49444 static bool
49445 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49447 unsigned i, elt, nelt = d->nelt;
49449 if (!d->one_operand_p)
49450 return false;
49452 elt = d->perm[0];
49453 for (i = 1; i < nelt; ++i)
49454 if (d->perm[i] != elt)
49455 return false;
49457 return expand_vec_perm_broadcast_1 (d);
49460 /* Implement arbitrary permutations of two V64QImode operands
49461 will 2 vpermi2w, 2 vpshufb and one vpor instruction. */
49462 static bool
49463 expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49465 if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49466 return false;
49468 if (d->testing_p)
49469 return true;
49471 struct expand_vec_perm_d ds[2];
49472 rtx rperm[128], vperm, target0, target1;
49473 unsigned int i, nelt;
49474 machine_mode vmode;
49476 nelt = d->nelt;
49477 vmode = V64QImode;
49479 for (i = 0; i < 2; i++)
49481 ds[i] = *d;
49482 ds[i].vmode = V32HImode;
49483 ds[i].nelt = 32;
49484 ds[i].target = gen_reg_rtx (V32HImode);
49485 ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49486 ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49489 /* Prepare permutations such that the first one takes care of
49490 putting the even bytes into the right positions or one higher
49491 positions (ds[0]) and the second one takes care of
49492 putting the odd bytes into the right positions or one below
49493 (ds[1]). */
49495 for (i = 0; i < nelt; i++)
49497 ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49498 if (i & 1)
49500 rperm[i] = constm1_rtx;
49501 rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49503 else
49505 rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49506 rperm[i + 64] = constm1_rtx;
49510 bool ok = expand_vec_perm_1 (&ds[0]);
49511 gcc_assert (ok);
49512 ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49514 ok = expand_vec_perm_1 (&ds[1]);
49515 gcc_assert (ok);
49516 ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49518 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49519 vperm = force_reg (vmode, vperm);
49520 target0 = gen_reg_rtx (V64QImode);
49521 emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49523 vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49524 vperm = force_reg (vmode, vperm);
49525 target1 = gen_reg_rtx (V64QImode);
49526 emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49528 emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49529 return true;
49532 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
49533 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
49534 all the shorter instruction sequences. */
49536 static bool
49537 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49539 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49540 unsigned int i, nelt, eltsz;
49541 bool used[4];
49543 if (!TARGET_AVX2
49544 || d->one_operand_p
49545 || (d->vmode != V32QImode && d->vmode != V16HImode))
49546 return false;
49548 if (d->testing_p)
49549 return true;
49551 nelt = d->nelt;
49552 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49554 /* Generate 4 permutation masks. If the required element is within
49555 the same lane, it is shuffled in. If the required element from the
49556 other lane, force a zero by setting bit 7 in the permutation mask.
49557 In the other mask the mask has non-negative elements if element
49558 is requested from the other lane, but also moved to the other lane,
49559 so that the result of vpshufb can have the two V2TImode halves
49560 swapped. */
49561 m128 = GEN_INT (-128);
49562 for (i = 0; i < 32; ++i)
49564 rperm[0][i] = m128;
49565 rperm[1][i] = m128;
49566 rperm[2][i] = m128;
49567 rperm[3][i] = m128;
49569 used[0] = false;
49570 used[1] = false;
49571 used[2] = false;
49572 used[3] = false;
49573 for (i = 0; i < nelt; ++i)
49575 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49576 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49577 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49579 for (j = 0; j < eltsz; ++j)
49580 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49581 used[which] = true;
49584 for (i = 0; i < 2; ++i)
49586 if (!used[2 * i + 1])
49588 h[i] = NULL_RTX;
49589 continue;
49591 vperm = gen_rtx_CONST_VECTOR (V32QImode,
49592 gen_rtvec_v (32, rperm[2 * i + 1]));
49593 vperm = force_reg (V32QImode, vperm);
49594 h[i] = gen_reg_rtx (V32QImode);
49595 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49596 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49599 /* Swap the 128-byte lanes of h[X]. */
49600 for (i = 0; i < 2; ++i)
49602 if (h[i] == NULL_RTX)
49603 continue;
49604 op = gen_reg_rtx (V4DImode);
49605 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49606 const2_rtx, GEN_INT (3), const0_rtx,
49607 const1_rtx));
49608 h[i] = gen_lowpart (V32QImode, op);
49611 for (i = 0; i < 2; ++i)
49613 if (!used[2 * i])
49615 l[i] = NULL_RTX;
49616 continue;
49618 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49619 vperm = force_reg (V32QImode, vperm);
49620 l[i] = gen_reg_rtx (V32QImode);
49621 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49622 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49625 for (i = 0; i < 2; ++i)
49627 if (h[i] && l[i])
49629 op = gen_reg_rtx (V32QImode);
49630 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49631 l[i] = op;
49633 else if (h[i])
49634 l[i] = h[i];
49637 gcc_assert (l[0] && l[1]);
49638 op = d->target;
49639 if (d->vmode != V32QImode)
49640 op = gen_reg_rtx (V32QImode);
49641 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49642 if (op != d->target)
49643 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49644 return true;
49647 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49648 With all of the interface bits taken care of, perform the expansion
49649 in D and return true on success. */
49651 static bool
49652 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49654 /* Try a single instruction expansion. */
49655 if (expand_vec_perm_1 (d))
49656 return true;
49658 /* Try sequences of two instructions. */
49660 if (expand_vec_perm_pshuflw_pshufhw (d))
49661 return true;
49663 if (expand_vec_perm_palignr (d, false))
49664 return true;
49666 if (expand_vec_perm_interleave2 (d))
49667 return true;
49669 if (expand_vec_perm_broadcast (d))
49670 return true;
49672 if (expand_vec_perm_vpermq_perm_1 (d))
49673 return true;
49675 if (expand_vec_perm_vperm2f128 (d))
49676 return true;
49678 if (expand_vec_perm_pblendv (d))
49679 return true;
49681 /* Try sequences of three instructions. */
49683 if (expand_vec_perm_even_odd_pack (d))
49684 return true;
49686 if (expand_vec_perm_2vperm2f128_vshuf (d))
49687 return true;
49689 if (expand_vec_perm_pshufb2 (d))
49690 return true;
49692 if (expand_vec_perm_interleave3 (d))
49693 return true;
49695 if (expand_vec_perm_vperm2f128_vblend (d))
49696 return true;
49698 /* Try sequences of four instructions. */
49700 if (expand_vec_perm_vpshufb2_vpermq (d))
49701 return true;
49703 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49704 return true;
49706 if (expand_vec_perm_vpermi2_vpshub2 (d))
49707 return true;
49709 /* ??? Look for narrow permutations whose element orderings would
49710 allow the promotion to a wider mode. */
49712 /* ??? Look for sequences of interleave or a wider permute that place
49713 the data into the correct lanes for a half-vector shuffle like
49714 pshuf[lh]w or vpermilps. */
49716 /* ??? Look for sequences of interleave that produce the desired results.
49717 The combinatorics of punpck[lh] get pretty ugly... */
49719 if (expand_vec_perm_even_odd (d))
49720 return true;
49722 /* Even longer sequences. */
49723 if (expand_vec_perm_vpshufb4_vpermq2 (d))
49724 return true;
49726 return false;
49729 /* If a permutation only uses one operand, make it clear. Returns true
49730 if the permutation references both operands. */
49732 static bool
49733 canonicalize_perm (struct expand_vec_perm_d *d)
49735 int i, which, nelt = d->nelt;
49737 for (i = which = 0; i < nelt; ++i)
49738 which |= (d->perm[i] < nelt ? 1 : 2);
49740 d->one_operand_p = true;
49741 switch (which)
49743 default:
49744 gcc_unreachable();
49746 case 3:
49747 if (!rtx_equal_p (d->op0, d->op1))
49749 d->one_operand_p = false;
49750 break;
49752 /* The elements of PERM do not suggest that only the first operand
49753 is used, but both operands are identical. Allow easier matching
49754 of the permutation by folding the permutation into the single
49755 input vector. */
49756 /* FALLTHRU */
49758 case 2:
49759 for (i = 0; i < nelt; ++i)
49760 d->perm[i] &= nelt - 1;
49761 d->op0 = d->op1;
49762 break;
49764 case 1:
49765 d->op1 = d->op0;
49766 break;
49769 return (which == 3);
49772 bool
49773 ix86_expand_vec_perm_const (rtx operands[4])
49775 struct expand_vec_perm_d d;
49776 unsigned char perm[MAX_VECT_LEN];
49777 int i, nelt;
49778 bool two_args;
49779 rtx sel;
49781 d.target = operands[0];
49782 d.op0 = operands[1];
49783 d.op1 = operands[2];
49784 sel = operands[3];
49786 d.vmode = GET_MODE (d.target);
49787 gcc_assert (VECTOR_MODE_P (d.vmode));
49788 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49789 d.testing_p = false;
49791 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49792 gcc_assert (XVECLEN (sel, 0) == nelt);
49793 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49795 for (i = 0; i < nelt; ++i)
49797 rtx e = XVECEXP (sel, 0, i);
49798 int ei = INTVAL (e) & (2 * nelt - 1);
49799 d.perm[i] = ei;
49800 perm[i] = ei;
49803 two_args = canonicalize_perm (&d);
49805 if (ix86_expand_vec_perm_const_1 (&d))
49806 return true;
49808 /* If the selector says both arguments are needed, but the operands are the
49809 same, the above tried to expand with one_operand_p and flattened selector.
49810 If that didn't work, retry without one_operand_p; we succeeded with that
49811 during testing. */
49812 if (two_args && d.one_operand_p)
49814 d.one_operand_p = false;
49815 memcpy (d.perm, perm, sizeof (perm));
49816 return ix86_expand_vec_perm_const_1 (&d);
49819 return false;
49822 /* Implement targetm.vectorize.vec_perm_const_ok. */
49824 static bool
49825 ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49826 const unsigned char *sel)
49828 struct expand_vec_perm_d d;
49829 unsigned int i, nelt, which;
49830 bool ret;
49832 d.vmode = vmode;
49833 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49834 d.testing_p = true;
49836 /* Given sufficient ISA support we can just return true here
49837 for selected vector modes. */
49838 switch (d.vmode)
49840 case V16SFmode:
49841 case V16SImode:
49842 case V8DImode:
49843 case V8DFmode:
49844 if (TARGET_AVX512F)
49845 /* All implementable with a single vpermi2 insn. */
49846 return true;
49847 break;
49848 case V32HImode:
49849 if (TARGET_AVX512BW)
49850 /* All implementable with a single vpermi2 insn. */
49851 return true;
49852 break;
49853 case V64QImode:
49854 if (TARGET_AVX512BW)
49855 /* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn. */
49856 return true;
49857 break;
49858 case V8SImode:
49859 case V8SFmode:
49860 case V4DFmode:
49861 case V4DImode:
49862 if (TARGET_AVX512VL)
49863 /* All implementable with a single vpermi2 insn. */
49864 return true;
49865 break;
49866 case V16HImode:
49867 if (TARGET_AVX2)
49868 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49869 return true;
49870 break;
49871 case V32QImode:
49872 if (TARGET_AVX2)
49873 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
49874 return true;
49875 break;
49876 case V4SImode:
49877 case V4SFmode:
49878 case V8HImode:
49879 case V16QImode:
49880 /* All implementable with a single vpperm insn. */
49881 if (TARGET_XOP)
49882 return true;
49883 /* All implementable with 2 pshufb + 1 ior. */
49884 if (TARGET_SSSE3)
49885 return true;
49886 break;
49887 case V2DImode:
49888 case V2DFmode:
49889 /* All implementable with shufpd or unpck[lh]pd. */
49890 return true;
49891 default:
49892 return false;
49895 /* Extract the values from the vector CST into the permutation
49896 array in D. */
49897 memcpy (d.perm, sel, nelt);
49898 for (i = which = 0; i < nelt; ++i)
49900 unsigned char e = d.perm[i];
49901 gcc_assert (e < 2 * nelt);
49902 which |= (e < nelt ? 1 : 2);
49905 /* For all elements from second vector, fold the elements to first. */
49906 if (which == 2)
49907 for (i = 0; i < nelt; ++i)
49908 d.perm[i] -= nelt;
49910 /* Check whether the mask can be applied to the vector type. */
49911 d.one_operand_p = (which != 3);
49913 /* Implementable with shufps or pshufd. */
49914 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49915 return true;
49917 /* Otherwise we have to go through the motions and see if we can
49918 figure out how to generate the requested permutation. */
49919 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49920 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49921 if (!d.one_operand_p)
49922 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49924 start_sequence ();
49925 ret = ix86_expand_vec_perm_const_1 (&d);
49926 end_sequence ();
49928 return ret;
49931 void
49932 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49934 struct expand_vec_perm_d d;
49935 unsigned i, nelt;
49937 d.target = targ;
49938 d.op0 = op0;
49939 d.op1 = op1;
49940 d.vmode = GET_MODE (targ);
49941 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49942 d.one_operand_p = false;
49943 d.testing_p = false;
49945 for (i = 0; i < nelt; ++i)
49946 d.perm[i] = i * 2 + odd;
49948 /* We'll either be able to implement the permutation directly... */
49949 if (expand_vec_perm_1 (&d))
49950 return;
49952 /* ... or we use the special-case patterns. */
49953 expand_vec_perm_even_odd_1 (&d, odd);
49956 static void
49957 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49959 struct expand_vec_perm_d d;
49960 unsigned i, nelt, base;
49961 bool ok;
49963 d.target = targ;
49964 d.op0 = op0;
49965 d.op1 = op1;
49966 d.vmode = GET_MODE (targ);
49967 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49968 d.one_operand_p = false;
49969 d.testing_p = false;
49971 base = high_p ? nelt / 2 : 0;
49972 for (i = 0; i < nelt / 2; ++i)
49974 d.perm[i * 2] = i + base;
49975 d.perm[i * 2 + 1] = i + base + nelt;
49978 /* Note that for AVX this isn't one instruction. */
49979 ok = ix86_expand_vec_perm_const_1 (&d);
49980 gcc_assert (ok);
49984 /* Expand a vector operation CODE for a V*QImode in terms of the
49985 same operation on V*HImode. */
49987 void
49988 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49990 machine_mode qimode = GET_MODE (dest);
49991 machine_mode himode;
49992 rtx (*gen_il) (rtx, rtx, rtx);
49993 rtx (*gen_ih) (rtx, rtx, rtx);
49994 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49995 struct expand_vec_perm_d d;
49996 bool ok, full_interleave;
49997 bool uns_p = false;
49998 int i;
50000 switch (qimode)
50002 case V16QImode:
50003 himode = V8HImode;
50004 gen_il = gen_vec_interleave_lowv16qi;
50005 gen_ih = gen_vec_interleave_highv16qi;
50006 break;
50007 case V32QImode:
50008 himode = V16HImode;
50009 gen_il = gen_avx2_interleave_lowv32qi;
50010 gen_ih = gen_avx2_interleave_highv32qi;
50011 break;
50012 case V64QImode:
50013 himode = V32HImode;
50014 gen_il = gen_avx512bw_interleave_lowv64qi;
50015 gen_ih = gen_avx512bw_interleave_highv64qi;
50016 break;
50017 default:
50018 gcc_unreachable ();
50021 op2_l = op2_h = op2;
50022 switch (code)
50024 case MULT:
50025 /* Unpack data such that we've got a source byte in each low byte of
50026 each word. We don't care what goes into the high byte of each word.
50027 Rather than trying to get zero in there, most convenient is to let
50028 it be a copy of the low byte. */
50029 op2_l = gen_reg_rtx (qimode);
50030 op2_h = gen_reg_rtx (qimode);
50031 emit_insn (gen_il (op2_l, op2, op2));
50032 emit_insn (gen_ih (op2_h, op2, op2));
50033 /* FALLTHRU */
50035 op1_l = gen_reg_rtx (qimode);
50036 op1_h = gen_reg_rtx (qimode);
50037 emit_insn (gen_il (op1_l, op1, op1));
50038 emit_insn (gen_ih (op1_h, op1, op1));
50039 full_interleave = qimode == V16QImode;
50040 break;
50042 case ASHIFT:
50043 case LSHIFTRT:
50044 uns_p = true;
50045 /* FALLTHRU */
50046 case ASHIFTRT:
50047 op1_l = gen_reg_rtx (himode);
50048 op1_h = gen_reg_rtx (himode);
50049 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
50050 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
50051 full_interleave = true;
50052 break;
50053 default:
50054 gcc_unreachable ();
50057 /* Perform the operation. */
50058 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
50059 1, OPTAB_DIRECT);
50060 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
50061 1, OPTAB_DIRECT);
50062 gcc_assert (res_l && res_h);
50064 /* Merge the data back into the right place. */
50065 d.target = dest;
50066 d.op0 = gen_lowpart (qimode, res_l);
50067 d.op1 = gen_lowpart (qimode, res_h);
50068 d.vmode = qimode;
50069 d.nelt = GET_MODE_NUNITS (qimode);
50070 d.one_operand_p = false;
50071 d.testing_p = false;
50073 if (full_interleave)
50075 /* For SSE2, we used an full interleave, so the desired
50076 results are in the even elements. */
50077 for (i = 0; i < 64; ++i)
50078 d.perm[i] = i * 2;
50080 else
50082 /* For AVX, the interleave used above was not cross-lane. So the
50083 extraction is evens but with the second and third quarter swapped.
50084 Happily, that is even one insn shorter than even extraction. */
50085 for (i = 0; i < 64; ++i)
50086 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
50089 ok = ix86_expand_vec_perm_const_1 (&d);
50090 gcc_assert (ok);
50092 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50093 gen_rtx_fmt_ee (code, qimode, op1, op2));
50096 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
50097 if op is CONST_VECTOR with all odd elements equal to their
50098 preceding element. */
50100 static bool
50101 const_vector_equal_evenodd_p (rtx op)
50103 machine_mode mode = GET_MODE (op);
50104 int i, nunits = GET_MODE_NUNITS (mode);
50105 if (GET_CODE (op) != CONST_VECTOR
50106 || nunits != CONST_VECTOR_NUNITS (op))
50107 return false;
50108 for (i = 0; i < nunits; i += 2)
50109 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50110 return false;
50111 return true;
50114 void
50115 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50116 bool uns_p, bool odd_p)
50118 machine_mode mode = GET_MODE (op1);
50119 machine_mode wmode = GET_MODE (dest);
50120 rtx x;
50121 rtx orig_op1 = op1, orig_op2 = op2;
50123 if (!nonimmediate_operand (op1, mode))
50124 op1 = force_reg (mode, op1);
50125 if (!nonimmediate_operand (op2, mode))
50126 op2 = force_reg (mode, op2);
50128 /* We only play even/odd games with vectors of SImode. */
50129 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50131 /* If we're looking for the odd results, shift those members down to
50132 the even slots. For some cpus this is faster than a PSHUFD. */
50133 if (odd_p)
50135 /* For XOP use vpmacsdqh, but only for smult, as it is only
50136 signed. */
50137 if (TARGET_XOP && mode == V4SImode && !uns_p)
50139 x = force_reg (wmode, CONST0_RTX (wmode));
50140 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50141 return;
50144 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50145 if (!const_vector_equal_evenodd_p (orig_op1))
50146 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50147 x, NULL, 1, OPTAB_DIRECT);
50148 if (!const_vector_equal_evenodd_p (orig_op2))
50149 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50150 x, NULL, 1, OPTAB_DIRECT);
50151 op1 = gen_lowpart (mode, op1);
50152 op2 = gen_lowpart (mode, op2);
50155 if (mode == V16SImode)
50157 if (uns_p)
50158 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50159 else
50160 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50162 else if (mode == V8SImode)
50164 if (uns_p)
50165 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50166 else
50167 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50169 else if (uns_p)
50170 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50171 else if (TARGET_SSE4_1)
50172 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50173 else
50175 rtx s1, s2, t0, t1, t2;
50177 /* The easiest way to implement this without PMULDQ is to go through
50178 the motions as if we are performing a full 64-bit multiply. With
50179 the exception that we need to do less shuffling of the elements. */
50181 /* Compute the sign-extension, aka highparts, of the two operands. */
50182 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50183 op1, pc_rtx, pc_rtx);
50184 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50185 op2, pc_rtx, pc_rtx);
50187 /* Multiply LO(A) * HI(B), and vice-versa. */
50188 t1 = gen_reg_rtx (wmode);
50189 t2 = gen_reg_rtx (wmode);
50190 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50191 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50193 /* Multiply LO(A) * LO(B). */
50194 t0 = gen_reg_rtx (wmode);
50195 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50197 /* Combine and shift the highparts into place. */
50198 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50199 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50200 1, OPTAB_DIRECT);
50202 /* Combine high and low parts. */
50203 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50204 return;
50206 emit_insn (x);
50209 void
50210 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50211 bool uns_p, bool high_p)
50213 machine_mode wmode = GET_MODE (dest);
50214 machine_mode mode = GET_MODE (op1);
50215 rtx t1, t2, t3, t4, mask;
50217 switch (mode)
50219 case V4SImode:
50220 t1 = gen_reg_rtx (mode);
50221 t2 = gen_reg_rtx (mode);
50222 if (TARGET_XOP && !uns_p)
50224 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
50225 shuffle the elements once so that all elements are in the right
50226 place for immediate use: { A C B D }. */
50227 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50228 const1_rtx, GEN_INT (3)));
50229 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50230 const1_rtx, GEN_INT (3)));
50232 else
50234 /* Put the elements into place for the multiply. */
50235 ix86_expand_vec_interleave (t1, op1, op1, high_p);
50236 ix86_expand_vec_interleave (t2, op2, op2, high_p);
50237 high_p = false;
50239 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50240 break;
50242 case V8SImode:
50243 /* Shuffle the elements between the lanes. After this we
50244 have { A B E F | C D G H } for each operand. */
50245 t1 = gen_reg_rtx (V4DImode);
50246 t2 = gen_reg_rtx (V4DImode);
50247 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50248 const0_rtx, const2_rtx,
50249 const1_rtx, GEN_INT (3)));
50250 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50251 const0_rtx, const2_rtx,
50252 const1_rtx, GEN_INT (3)));
50254 /* Shuffle the elements within the lanes. After this we
50255 have { A A B B | C C D D } or { E E F F | G G H H }. */
50256 t3 = gen_reg_rtx (V8SImode);
50257 t4 = gen_reg_rtx (V8SImode);
50258 mask = GEN_INT (high_p
50259 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50260 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50261 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50262 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50264 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50265 break;
50267 case V8HImode:
50268 case V16HImode:
50269 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50270 uns_p, OPTAB_DIRECT);
50271 t2 = expand_binop (mode,
50272 uns_p ? umul_highpart_optab : smul_highpart_optab,
50273 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50274 gcc_assert (t1 && t2);
50276 t3 = gen_reg_rtx (mode);
50277 ix86_expand_vec_interleave (t3, t1, t2, high_p);
50278 emit_move_insn (dest, gen_lowpart (wmode, t3));
50279 break;
50281 case V16QImode:
50282 case V32QImode:
50283 case V32HImode:
50284 case V16SImode:
50285 case V64QImode:
50286 t1 = gen_reg_rtx (wmode);
50287 t2 = gen_reg_rtx (wmode);
50288 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50289 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50291 emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2)));
50292 break;
50294 default:
50295 gcc_unreachable ();
50299 void
50300 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50302 rtx res_1, res_2, res_3, res_4;
50304 res_1 = gen_reg_rtx (V4SImode);
50305 res_2 = gen_reg_rtx (V4SImode);
50306 res_3 = gen_reg_rtx (V2DImode);
50307 res_4 = gen_reg_rtx (V2DImode);
50308 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50309 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50311 /* Move the results in element 2 down to element 1; we don't care
50312 what goes in elements 2 and 3. Then we can merge the parts
50313 back together with an interleave.
50315 Note that two other sequences were tried:
50316 (1) Use interleaves at the start instead of psrldq, which allows
50317 us to use a single shufps to merge things back at the end.
50318 (2) Use shufps here to combine the two vectors, then pshufd to
50319 put the elements in the correct order.
50320 In both cases the cost of the reformatting stall was too high
50321 and the overall sequence slower. */
50323 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50324 const0_rtx, const2_rtx,
50325 const0_rtx, const0_rtx));
50326 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50327 const0_rtx, const2_rtx,
50328 const0_rtx, const0_rtx));
50329 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50331 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50334 void
50335 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50337 machine_mode mode = GET_MODE (op0);
50338 rtx t1, t2, t3, t4, t5, t6;
50340 if (TARGET_AVX512DQ && mode == V8DImode)
50341 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50342 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50343 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50344 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50345 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50346 else if (TARGET_XOP && mode == V2DImode)
50348 /* op1: A,B,C,D, op2: E,F,G,H */
50349 op1 = gen_lowpart (V4SImode, op1);
50350 op2 = gen_lowpart (V4SImode, op2);
50352 t1 = gen_reg_rtx (V4SImode);
50353 t2 = gen_reg_rtx (V4SImode);
50354 t3 = gen_reg_rtx (V2DImode);
50355 t4 = gen_reg_rtx (V2DImode);
50357 /* t1: B,A,D,C */
50358 emit_insn (gen_sse2_pshufd_1 (t1, op1,
50359 GEN_INT (1),
50360 GEN_INT (0),
50361 GEN_INT (3),
50362 GEN_INT (2)));
50364 /* t2: (B*E),(A*F),(D*G),(C*H) */
50365 emit_insn (gen_mulv4si3 (t2, t1, op2));
50367 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50368 emit_insn (gen_xop_phadddq (t3, t2));
50370 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50371 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50373 /* Multiply lower parts and add all */
50374 t5 = gen_reg_rtx (V2DImode);
50375 emit_insn (gen_vec_widen_umult_even_v4si (t5,
50376 gen_lowpart (V4SImode, op1),
50377 gen_lowpart (V4SImode, op2)));
50378 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50381 else
50383 machine_mode nmode;
50384 rtx (*umul) (rtx, rtx, rtx);
50386 if (mode == V2DImode)
50388 umul = gen_vec_widen_umult_even_v4si;
50389 nmode = V4SImode;
50391 else if (mode == V4DImode)
50393 umul = gen_vec_widen_umult_even_v8si;
50394 nmode = V8SImode;
50396 else if (mode == V8DImode)
50398 umul = gen_vec_widen_umult_even_v16si;
50399 nmode = V16SImode;
50401 else
50402 gcc_unreachable ();
50405 /* Multiply low parts. */
50406 t1 = gen_reg_rtx (mode);
50407 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50409 /* Shift input vectors right 32 bits so we can multiply high parts. */
50410 t6 = GEN_INT (32);
50411 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50412 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50414 /* Multiply high parts by low parts. */
50415 t4 = gen_reg_rtx (mode);
50416 t5 = gen_reg_rtx (mode);
50417 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50418 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50420 /* Combine and shift the highparts back. */
50421 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50422 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50424 /* Combine high and low parts. */
50425 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50428 set_unique_reg_note (get_last_insn (), REG_EQUAL,
50429 gen_rtx_MULT (mode, op1, op2));
50432 /* Return 1 if control tansfer instruction INSN
50433 should be encoded with bnd prefix.
50434 If insn is NULL then return 1 when control
50435 transfer instructions should be prefixed with
50436 bnd by default for current function. */
50438 bool
50439 ix86_bnd_prefixed_insn_p (rtx insn)
50441 /* For call insns check special flag. */
50442 if (insn && CALL_P (insn))
50444 rtx call = get_call_rtx_from (insn);
50445 if (call)
50446 return CALL_EXPR_WITH_BOUNDS_P (call);
50449 /* All other insns are prefixed only if function is instrumented. */
50450 return chkp_function_instrumented_p (current_function_decl);
50453 /* Calculate integer abs() using only SSE2 instructions. */
50455 void
50456 ix86_expand_sse2_abs (rtx target, rtx input)
50458 machine_mode mode = GET_MODE (target);
50459 rtx tmp0, tmp1, x;
50461 switch (mode)
50463 /* For 32-bit signed integer X, the best way to calculate the absolute
50464 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
50465 case V4SImode:
50466 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50467 GEN_INT (GET_MODE_BITSIZE
50468 (GET_MODE_INNER (mode)) - 1),
50469 NULL, 0, OPTAB_DIRECT);
50470 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50471 NULL, 0, OPTAB_DIRECT);
50472 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50473 target, 0, OPTAB_DIRECT);
50474 break;
50476 /* For 16-bit signed integer X, the best way to calculate the absolute
50477 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
50478 case V8HImode:
50479 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50481 x = expand_simple_binop (mode, SMAX, tmp0, input,
50482 target, 0, OPTAB_DIRECT);
50483 break;
50485 /* For 8-bit signed integer X, the best way to calculate the absolute
50486 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50487 as SSE2 provides the PMINUB insn. */
50488 case V16QImode:
50489 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50491 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50492 target, 0, OPTAB_DIRECT);
50493 break;
50495 default:
50496 gcc_unreachable ();
50499 if (x != target)
50500 emit_move_insn (target, x);
50503 /* Expand an extract from a vector register through pextr insn.
50504 Return true if successful. */
50506 bool
50507 ix86_expand_pextr (rtx *operands)
50509 rtx dst = operands[0];
50510 rtx src = operands[1];
50512 unsigned int size = INTVAL (operands[2]);
50513 unsigned int pos = INTVAL (operands[3]);
50515 if (GET_CODE (dst) == SUBREG)
50517 /* Reject non-lowpart subregs. */
50518 if (SUBREG_BYTE (dst) > 0)
50519 return false;
50520 dst = SUBREG_REG (dst);
50523 if (GET_CODE (src) == SUBREG)
50525 pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
50526 src = SUBREG_REG (src);
50529 switch (GET_MODE (src))
50531 case V16QImode:
50532 case V8HImode:
50533 case V4SImode:
50534 case V2DImode:
50535 case V1TImode:
50536 case TImode:
50538 machine_mode srcmode, dstmode;
50539 rtx d, pat;
50541 dstmode = mode_for_size (size, MODE_INT, 0);
50543 switch (dstmode)
50545 case QImode:
50546 if (!TARGET_SSE4_1)
50547 return false;
50548 srcmode = V16QImode;
50549 break;
50551 case HImode:
50552 if (!TARGET_SSE2)
50553 return false;
50554 srcmode = V8HImode;
50555 break;
50557 case SImode:
50558 if (!TARGET_SSE4_1)
50559 return false;
50560 srcmode = V4SImode;
50561 break;
50563 case DImode:
50564 gcc_assert (TARGET_64BIT);
50565 if (!TARGET_SSE4_1)
50566 return false;
50567 srcmode = V2DImode;
50568 break;
50570 default:
50571 return false;
50574 /* Reject extractions from misaligned positions. */
50575 if (pos & (size-1))
50576 return false;
50578 if (GET_MODE (dst) == dstmode)
50579 d = dst;
50580 else
50581 d = gen_reg_rtx (dstmode);
50583 /* Construct insn pattern. */
50584 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size)));
50585 pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat);
50587 /* Let the rtl optimizers know about the zero extension performed. */
50588 if (dstmode == QImode || dstmode == HImode)
50590 pat = gen_rtx_ZERO_EXTEND (SImode, pat);
50591 d = gen_lowpart (SImode, d);
50594 emit_insn (gen_rtx_SET (d, pat));
50596 if (d != dst)
50597 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50598 return true;
50601 default:
50602 return false;
50606 /* Expand an insert into a vector register through pinsr insn.
50607 Return true if successful. */
50609 bool
50610 ix86_expand_pinsr (rtx *operands)
50612 rtx dst = operands[0];
50613 rtx src = operands[3];
50615 unsigned int size = INTVAL (operands[1]);
50616 unsigned int pos = INTVAL (operands[2]);
50618 if (GET_CODE (dst) == SUBREG)
50620 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50621 dst = SUBREG_REG (dst);
50624 switch (GET_MODE (dst))
50626 case V16QImode:
50627 case V8HImode:
50628 case V4SImode:
50629 case V2DImode:
50630 case V1TImode:
50631 case TImode:
50633 machine_mode srcmode, dstmode;
50634 rtx (*pinsr)(rtx, rtx, rtx, rtx);
50635 rtx d;
50637 srcmode = mode_for_size (size, MODE_INT, 0);
50639 switch (srcmode)
50641 case QImode:
50642 if (!TARGET_SSE4_1)
50643 return false;
50644 dstmode = V16QImode;
50645 pinsr = gen_sse4_1_pinsrb;
50646 break;
50648 case HImode:
50649 if (!TARGET_SSE2)
50650 return false;
50651 dstmode = V8HImode;
50652 pinsr = gen_sse2_pinsrw;
50653 break;
50655 case SImode:
50656 if (!TARGET_SSE4_1)
50657 return false;
50658 dstmode = V4SImode;
50659 pinsr = gen_sse4_1_pinsrd;
50660 break;
50662 case DImode:
50663 gcc_assert (TARGET_64BIT);
50664 if (!TARGET_SSE4_1)
50665 return false;
50666 dstmode = V2DImode;
50667 pinsr = gen_sse4_1_pinsrq;
50668 break;
50670 default:
50671 return false;
50674 /* Reject insertions to misaligned positions. */
50675 if (pos & (size-1))
50676 return false;
50678 if (GET_CODE (src) == SUBREG)
50680 unsigned int srcpos = SUBREG_BYTE (src);
50682 if (srcpos > 0)
50684 rtx extr_ops[4];
50686 extr_ops[0] = gen_reg_rtx (srcmode);
50687 extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src));
50688 extr_ops[2] = GEN_INT (size);
50689 extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT);
50691 if (!ix86_expand_pextr (extr_ops))
50692 return false;
50694 src = extr_ops[0];
50696 else
50697 src = gen_lowpart (srcmode, SUBREG_REG (src));
50700 if (GET_MODE (dst) == dstmode)
50701 d = dst;
50702 else
50703 d = gen_reg_rtx (dstmode);
50705 emit_insn (pinsr (d, gen_lowpart (dstmode, dst),
50706 gen_lowpart (srcmode, src),
50707 GEN_INT (1 << (pos / size))));
50708 if (d != dst)
50709 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50710 return true;
50713 default:
50714 return false;
50718 /* This function returns the calling abi specific va_list type node.
50719 It returns the FNDECL specific va_list type. */
50721 static tree
50722 ix86_fn_abi_va_list (tree fndecl)
50724 if (!TARGET_64BIT)
50725 return va_list_type_node;
50726 gcc_assert (fndecl != NULL_TREE);
50728 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50729 return ms_va_list_type_node;
50730 else
50731 return sysv_va_list_type_node;
50734 /* Returns the canonical va_list type specified by TYPE. If there
50735 is no valid TYPE provided, it return NULL_TREE. */
50737 static tree
50738 ix86_canonical_va_list_type (tree type)
50740 tree wtype, htype;
50742 /* Resolve references and pointers to va_list type. */
50743 if (TREE_CODE (type) == MEM_REF)
50744 type = TREE_TYPE (type);
50745 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50746 type = TREE_TYPE (type);
50747 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50748 type = TREE_TYPE (type);
50750 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50752 wtype = va_list_type_node;
50753 gcc_assert (wtype != NULL_TREE);
50754 htype = type;
50755 if (TREE_CODE (wtype) == ARRAY_TYPE)
50757 /* If va_list is an array type, the argument may have decayed
50758 to a pointer type, e.g. by being passed to another function.
50759 In that case, unwrap both types so that we can compare the
50760 underlying records. */
50761 if (TREE_CODE (htype) == ARRAY_TYPE
50762 || POINTER_TYPE_P (htype))
50764 wtype = TREE_TYPE (wtype);
50765 htype = TREE_TYPE (htype);
50768 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50769 return va_list_type_node;
50770 wtype = sysv_va_list_type_node;
50771 gcc_assert (wtype != NULL_TREE);
50772 htype = type;
50773 if (TREE_CODE (wtype) == ARRAY_TYPE)
50775 /* If va_list is an array type, the argument may have decayed
50776 to a pointer type, e.g. by being passed to another function.
50777 In that case, unwrap both types so that we can compare the
50778 underlying records. */
50779 if (TREE_CODE (htype) == ARRAY_TYPE
50780 || POINTER_TYPE_P (htype))
50782 wtype = TREE_TYPE (wtype);
50783 htype = TREE_TYPE (htype);
50786 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50787 return sysv_va_list_type_node;
50788 wtype = ms_va_list_type_node;
50789 gcc_assert (wtype != NULL_TREE);
50790 htype = type;
50791 if (TREE_CODE (wtype) == ARRAY_TYPE)
50793 /* If va_list is an array type, the argument may have decayed
50794 to a pointer type, e.g. by being passed to another function.
50795 In that case, unwrap both types so that we can compare the
50796 underlying records. */
50797 if (TREE_CODE (htype) == ARRAY_TYPE
50798 || POINTER_TYPE_P (htype))
50800 wtype = TREE_TYPE (wtype);
50801 htype = TREE_TYPE (htype);
50804 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50805 return ms_va_list_type_node;
50806 return NULL_TREE;
50808 return std_canonical_va_list_type (type);
50811 /* Iterate through the target-specific builtin types for va_list.
50812 IDX denotes the iterator, *PTREE is set to the result type of
50813 the va_list builtin, and *PNAME to its internal type.
50814 Returns zero if there is no element for this index, otherwise
50815 IDX should be increased upon the next call.
50816 Note, do not iterate a base builtin's name like __builtin_va_list.
50817 Used from c_common_nodes_and_builtins. */
50819 static int
50820 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50822 if (TARGET_64BIT)
50824 switch (idx)
50826 default:
50827 break;
50829 case 0:
50830 *ptree = ms_va_list_type_node;
50831 *pname = "__builtin_ms_va_list";
50832 return 1;
50834 case 1:
50835 *ptree = sysv_va_list_type_node;
50836 *pname = "__builtin_sysv_va_list";
50837 return 1;
50841 return 0;
50844 #undef TARGET_SCHED_DISPATCH
50845 #define TARGET_SCHED_DISPATCH has_dispatch
50846 #undef TARGET_SCHED_DISPATCH_DO
50847 #define TARGET_SCHED_DISPATCH_DO do_dispatch
50848 #undef TARGET_SCHED_REASSOCIATION_WIDTH
50849 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50850 #undef TARGET_SCHED_REORDER
50851 #define TARGET_SCHED_REORDER ix86_sched_reorder
50852 #undef TARGET_SCHED_ADJUST_PRIORITY
50853 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50854 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50855 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50856 ix86_dependencies_evaluation_hook
50858 /* The size of the dispatch window is the total number of bytes of
50859 object code allowed in a window. */
50860 #define DISPATCH_WINDOW_SIZE 16
50862 /* Number of dispatch windows considered for scheduling. */
50863 #define MAX_DISPATCH_WINDOWS 3
50865 /* Maximum number of instructions in a window. */
50866 #define MAX_INSN 4
50868 /* Maximum number of immediate operands in a window. */
50869 #define MAX_IMM 4
50871 /* Maximum number of immediate bits allowed in a window. */
50872 #define MAX_IMM_SIZE 128
50874 /* Maximum number of 32 bit immediates allowed in a window. */
50875 #define MAX_IMM_32 4
50877 /* Maximum number of 64 bit immediates allowed in a window. */
50878 #define MAX_IMM_64 2
50880 /* Maximum total of loads or prefetches allowed in a window. */
50881 #define MAX_LOAD 2
50883 /* Maximum total of stores allowed in a window. */
50884 #define MAX_STORE 1
50886 #undef BIG
50887 #define BIG 100
50890 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
50891 enum dispatch_group {
50892 disp_no_group = 0,
50893 disp_load,
50894 disp_store,
50895 disp_load_store,
50896 disp_prefetch,
50897 disp_imm,
50898 disp_imm_32,
50899 disp_imm_64,
50900 disp_branch,
50901 disp_cmp,
50902 disp_jcc,
50903 disp_last
50906 /* Number of allowable groups in a dispatch window. It is an array
50907 indexed by dispatch_group enum. 100 is used as a big number,
50908 because the number of these kind of operations does not have any
50909 effect in dispatch window, but we need them for other reasons in
50910 the table. */
50911 static unsigned int num_allowable_groups[disp_last] = {
50912 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50915 char group_name[disp_last + 1][16] = {
50916 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50917 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50918 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50921 /* Instruction path. */
50922 enum insn_path {
50923 no_path = 0,
50924 path_single, /* Single micro op. */
50925 path_double, /* Double micro op. */
50926 path_multi, /* Instructions with more than 2 micro op.. */
50927 last_path
50930 /* sched_insn_info defines a window to the instructions scheduled in
50931 the basic block. It contains a pointer to the insn_info table and
50932 the instruction scheduled.
50934 Windows are allocated for each basic block and are linked
50935 together. */
50936 typedef struct sched_insn_info_s {
50937 rtx insn;
50938 enum dispatch_group group;
50939 enum insn_path path;
50940 int byte_len;
50941 int imm_bytes;
50942 } sched_insn_info;
50944 /* Linked list of dispatch windows. This is a two way list of
50945 dispatch windows of a basic block. It contains information about
50946 the number of uops in the window and the total number of
50947 instructions and of bytes in the object code for this dispatch
50948 window. */
50949 typedef struct dispatch_windows_s {
50950 int num_insn; /* Number of insn in the window. */
50951 int num_uops; /* Number of uops in the window. */
50952 int window_size; /* Number of bytes in the window. */
50953 int window_num; /* Window number between 0 or 1. */
50954 int num_imm; /* Number of immediates in an insn. */
50955 int num_imm_32; /* Number of 32 bit immediates in an insn. */
50956 int num_imm_64; /* Number of 64 bit immediates in an insn. */
50957 int imm_size; /* Total immediates in the window. */
50958 int num_loads; /* Total memory loads in the window. */
50959 int num_stores; /* Total memory stores in the window. */
50960 int violation; /* Violation exists in window. */
50961 sched_insn_info *window; /* Pointer to the window. */
50962 struct dispatch_windows_s *next;
50963 struct dispatch_windows_s *prev;
50964 } dispatch_windows;
50966 /* Immediate valuse used in an insn. */
50967 typedef struct imm_info_s
50969 int imm;
50970 int imm32;
50971 int imm64;
50972 } imm_info;
50974 static dispatch_windows *dispatch_window_list;
50975 static dispatch_windows *dispatch_window_list1;
50977 /* Get dispatch group of insn. */
50979 static enum dispatch_group
50980 get_mem_group (rtx_insn *insn)
50982 enum attr_memory memory;
50984 if (INSN_CODE (insn) < 0)
50985 return disp_no_group;
50986 memory = get_attr_memory (insn);
50987 if (memory == MEMORY_STORE)
50988 return disp_store;
50990 if (memory == MEMORY_LOAD)
50991 return disp_load;
50993 if (memory == MEMORY_BOTH)
50994 return disp_load_store;
50996 return disp_no_group;
50999 /* Return true if insn is a compare instruction. */
51001 static bool
51002 is_cmp (rtx_insn *insn)
51004 enum attr_type type;
51006 type = get_attr_type (insn);
51007 return (type == TYPE_TEST
51008 || type == TYPE_ICMP
51009 || type == TYPE_FCMP
51010 || GET_CODE (PATTERN (insn)) == COMPARE);
51013 /* Return true if a dispatch violation encountered. */
51015 static bool
51016 dispatch_violation (void)
51018 if (dispatch_window_list->next)
51019 return dispatch_window_list->next->violation;
51020 return dispatch_window_list->violation;
51023 /* Return true if insn is a branch instruction. */
51025 static bool
51026 is_branch (rtx_insn *insn)
51028 return (CALL_P (insn) || JUMP_P (insn));
51031 /* Return true if insn is a prefetch instruction. */
51033 static bool
51034 is_prefetch (rtx_insn *insn)
51036 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
51039 /* This function initializes a dispatch window and the list container holding a
51040 pointer to the window. */
51042 static void
51043 init_window (int window_num)
51045 int i;
51046 dispatch_windows *new_list;
51048 if (window_num == 0)
51049 new_list = dispatch_window_list;
51050 else
51051 new_list = dispatch_window_list1;
51053 new_list->num_insn = 0;
51054 new_list->num_uops = 0;
51055 new_list->window_size = 0;
51056 new_list->next = NULL;
51057 new_list->prev = NULL;
51058 new_list->window_num = window_num;
51059 new_list->num_imm = 0;
51060 new_list->num_imm_32 = 0;
51061 new_list->num_imm_64 = 0;
51062 new_list->imm_size = 0;
51063 new_list->num_loads = 0;
51064 new_list->num_stores = 0;
51065 new_list->violation = false;
51067 for (i = 0; i < MAX_INSN; i++)
51069 new_list->window[i].insn = NULL;
51070 new_list->window[i].group = disp_no_group;
51071 new_list->window[i].path = no_path;
51072 new_list->window[i].byte_len = 0;
51073 new_list->window[i].imm_bytes = 0;
51075 return;
51078 /* This function allocates and initializes a dispatch window and the
51079 list container holding a pointer to the window. */
51081 static dispatch_windows *
51082 allocate_window (void)
51084 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
51085 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
51087 return new_list;
51090 /* This routine initializes the dispatch scheduling information. It
51091 initiates building dispatch scheduler tables and constructs the
51092 first dispatch window. */
51094 static void
51095 init_dispatch_sched (void)
51097 /* Allocate a dispatch list and a window. */
51098 dispatch_window_list = allocate_window ();
51099 dispatch_window_list1 = allocate_window ();
51100 init_window (0);
51101 init_window (1);
51104 /* This function returns true if a branch is detected. End of a basic block
51105 does not have to be a branch, but here we assume only branches end a
51106 window. */
51108 static bool
51109 is_end_basic_block (enum dispatch_group group)
51111 return group == disp_branch;
51114 /* This function is called when the end of a window processing is reached. */
51116 static void
51117 process_end_window (void)
51119 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
51120 if (dispatch_window_list->next)
51122 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
51123 gcc_assert (dispatch_window_list->window_size
51124 + dispatch_window_list1->window_size <= 48);
51125 init_window (1);
51127 init_window (0);
51130 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
51131 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
51132 for 48 bytes of instructions. Note that these windows are not dispatch
51133 windows that their sizes are DISPATCH_WINDOW_SIZE. */
51135 static dispatch_windows *
51136 allocate_next_window (int window_num)
51138 if (window_num == 0)
51140 if (dispatch_window_list->next)
51141 init_window (1);
51142 init_window (0);
51143 return dispatch_window_list;
51146 dispatch_window_list->next = dispatch_window_list1;
51147 dispatch_window_list1->prev = dispatch_window_list;
51149 return dispatch_window_list1;
51152 /* Compute number of immediate operands of an instruction. */
51154 static void
51155 find_constant (rtx in_rtx, imm_info *imm_values)
51157 if (INSN_P (in_rtx))
51158 in_rtx = PATTERN (in_rtx);
51159 subrtx_iterator::array_type array;
51160 FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
51161 if (const_rtx x = *iter)
51162 switch (GET_CODE (x))
51164 case CONST:
51165 case SYMBOL_REF:
51166 case CONST_INT:
51167 (imm_values->imm)++;
51168 if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
51169 (imm_values->imm32)++;
51170 else
51171 (imm_values->imm64)++;
51172 break;
51174 case CONST_DOUBLE:
51175 case CONST_WIDE_INT:
51176 (imm_values->imm)++;
51177 (imm_values->imm64)++;
51178 break;
51180 case CODE_LABEL:
51181 if (LABEL_KIND (x) == LABEL_NORMAL)
51183 (imm_values->imm)++;
51184 (imm_values->imm32)++;
51186 break;
51188 default:
51189 break;
51193 /* Return total size of immediate operands of an instruction along with number
51194 of corresponding immediate-operands. It initializes its parameters to zero
51195 befor calling FIND_CONSTANT.
51196 INSN is the input instruction. IMM is the total of immediates.
51197 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
51198 bit immediates. */
51200 static int
51201 get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
51203 imm_info imm_values = {0, 0, 0};
51205 find_constant (insn, &imm_values);
51206 *imm = imm_values.imm;
51207 *imm32 = imm_values.imm32;
51208 *imm64 = imm_values.imm64;
51209 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
51212 /* This function indicates if an operand of an instruction is an
51213 immediate. */
51215 static bool
51216 has_immediate (rtx_insn *insn)
51218 int num_imm_operand;
51219 int num_imm32_operand;
51220 int num_imm64_operand;
51222 if (insn)
51223 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51224 &num_imm64_operand);
51225 return false;
51228 /* Return single or double path for instructions. */
51230 static enum insn_path
51231 get_insn_path (rtx_insn *insn)
51233 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51235 if ((int)path == 0)
51236 return path_single;
51238 if ((int)path == 1)
51239 return path_double;
51241 return path_multi;
51244 /* Return insn dispatch group. */
51246 static enum dispatch_group
51247 get_insn_group (rtx_insn *insn)
51249 enum dispatch_group group = get_mem_group (insn);
51250 if (group)
51251 return group;
51253 if (is_branch (insn))
51254 return disp_branch;
51256 if (is_cmp (insn))
51257 return disp_cmp;
51259 if (has_immediate (insn))
51260 return disp_imm;
51262 if (is_prefetch (insn))
51263 return disp_prefetch;
51265 return disp_no_group;
51268 /* Count number of GROUP restricted instructions in a dispatch
51269 window WINDOW_LIST. */
51271 static int
51272 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51274 enum dispatch_group group = get_insn_group (insn);
51275 int imm_size;
51276 int num_imm_operand;
51277 int num_imm32_operand;
51278 int num_imm64_operand;
51280 if (group == disp_no_group)
51281 return 0;
51283 if (group == disp_imm)
51285 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51286 &num_imm64_operand);
51287 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51288 || num_imm_operand + window_list->num_imm > MAX_IMM
51289 || (num_imm32_operand > 0
51290 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51291 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51292 || (num_imm64_operand > 0
51293 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51294 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51295 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51296 && num_imm64_operand > 0
51297 && ((window_list->num_imm_64 > 0
51298 && window_list->num_insn >= 2)
51299 || window_list->num_insn >= 3)))
51300 return BIG;
51302 return 1;
51305 if ((group == disp_load_store
51306 && (window_list->num_loads >= MAX_LOAD
51307 || window_list->num_stores >= MAX_STORE))
51308 || ((group == disp_load
51309 || group == disp_prefetch)
51310 && window_list->num_loads >= MAX_LOAD)
51311 || (group == disp_store
51312 && window_list->num_stores >= MAX_STORE))
51313 return BIG;
51315 return 1;
51318 /* This function returns true if insn satisfies dispatch rules on the
51319 last window scheduled. */
51321 static bool
51322 fits_dispatch_window (rtx_insn *insn)
51324 dispatch_windows *window_list = dispatch_window_list;
51325 dispatch_windows *window_list_next = dispatch_window_list->next;
51326 unsigned int num_restrict;
51327 enum dispatch_group group = get_insn_group (insn);
51328 enum insn_path path = get_insn_path (insn);
51329 int sum;
51331 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
51332 instructions should be given the lowest priority in the
51333 scheduling process in Haifa scheduler to make sure they will be
51334 scheduled in the same dispatch window as the reference to them. */
51335 if (group == disp_jcc || group == disp_cmp)
51336 return false;
51338 /* Check nonrestricted. */
51339 if (group == disp_no_group || group == disp_branch)
51340 return true;
51342 /* Get last dispatch window. */
51343 if (window_list_next)
51344 window_list = window_list_next;
51346 if (window_list->window_num == 1)
51348 sum = window_list->prev->window_size + window_list->window_size;
51350 if (sum == 32
51351 || (min_insn_size (insn) + sum) >= 48)
51352 /* Window 1 is full. Go for next window. */
51353 return true;
51356 num_restrict = count_num_restricted (insn, window_list);
51358 if (num_restrict > num_allowable_groups[group])
51359 return false;
51361 /* See if it fits in the first window. */
51362 if (window_list->window_num == 0)
51364 /* The first widow should have only single and double path
51365 uops. */
51366 if (path == path_double
51367 && (window_list->num_uops + 2) > MAX_INSN)
51368 return false;
51369 else if (path != path_single)
51370 return false;
51372 return true;
51375 /* Add an instruction INSN with NUM_UOPS micro-operations to the
51376 dispatch window WINDOW_LIST. */
51378 static void
51379 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51381 int byte_len = min_insn_size (insn);
51382 int num_insn = window_list->num_insn;
51383 int imm_size;
51384 sched_insn_info *window = window_list->window;
51385 enum dispatch_group group = get_insn_group (insn);
51386 enum insn_path path = get_insn_path (insn);
51387 int num_imm_operand;
51388 int num_imm32_operand;
51389 int num_imm64_operand;
51391 if (!window_list->violation && group != disp_cmp
51392 && !fits_dispatch_window (insn))
51393 window_list->violation = true;
51395 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51396 &num_imm64_operand);
51398 /* Initialize window with new instruction. */
51399 window[num_insn].insn = insn;
51400 window[num_insn].byte_len = byte_len;
51401 window[num_insn].group = group;
51402 window[num_insn].path = path;
51403 window[num_insn].imm_bytes = imm_size;
51405 window_list->window_size += byte_len;
51406 window_list->num_insn = num_insn + 1;
51407 window_list->num_uops = window_list->num_uops + num_uops;
51408 window_list->imm_size += imm_size;
51409 window_list->num_imm += num_imm_operand;
51410 window_list->num_imm_32 += num_imm32_operand;
51411 window_list->num_imm_64 += num_imm64_operand;
51413 if (group == disp_store)
51414 window_list->num_stores += 1;
51415 else if (group == disp_load
51416 || group == disp_prefetch)
51417 window_list->num_loads += 1;
51418 else if (group == disp_load_store)
51420 window_list->num_stores += 1;
51421 window_list->num_loads += 1;
51425 /* Adds a scheduled instruction, INSN, to the current dispatch window.
51426 If the total bytes of instructions or the number of instructions in
51427 the window exceed allowable, it allocates a new window. */
51429 static void
51430 add_to_dispatch_window (rtx_insn *insn)
51432 int byte_len;
51433 dispatch_windows *window_list;
51434 dispatch_windows *next_list;
51435 dispatch_windows *window0_list;
51436 enum insn_path path;
51437 enum dispatch_group insn_group;
51438 bool insn_fits;
51439 int num_insn;
51440 int num_uops;
51441 int window_num;
51442 int insn_num_uops;
51443 int sum;
51445 if (INSN_CODE (insn) < 0)
51446 return;
51448 byte_len = min_insn_size (insn);
51449 window_list = dispatch_window_list;
51450 next_list = window_list->next;
51451 path = get_insn_path (insn);
51452 insn_group = get_insn_group (insn);
51454 /* Get the last dispatch window. */
51455 if (next_list)
51456 window_list = dispatch_window_list->next;
51458 if (path == path_single)
51459 insn_num_uops = 1;
51460 else if (path == path_double)
51461 insn_num_uops = 2;
51462 else
51463 insn_num_uops = (int) path;
51465 /* If current window is full, get a new window.
51466 Window number zero is full, if MAX_INSN uops are scheduled in it.
51467 Window number one is full, if window zero's bytes plus window
51468 one's bytes is 32, or if the bytes of the new instruction added
51469 to the total makes it greater than 48, or it has already MAX_INSN
51470 instructions in it. */
51471 num_insn = window_list->num_insn;
51472 num_uops = window_list->num_uops;
51473 window_num = window_list->window_num;
51474 insn_fits = fits_dispatch_window (insn);
51476 if (num_insn >= MAX_INSN
51477 || num_uops + insn_num_uops > MAX_INSN
51478 || !(insn_fits))
51480 window_num = ~window_num & 1;
51481 window_list = allocate_next_window (window_num);
51484 if (window_num == 0)
51486 add_insn_window (insn, window_list, insn_num_uops);
51487 if (window_list->num_insn >= MAX_INSN
51488 && insn_group == disp_branch)
51490 process_end_window ();
51491 return;
51494 else if (window_num == 1)
51496 window0_list = window_list->prev;
51497 sum = window0_list->window_size + window_list->window_size;
51498 if (sum == 32
51499 || (byte_len + sum) >= 48)
51501 process_end_window ();
51502 window_list = dispatch_window_list;
51505 add_insn_window (insn, window_list, insn_num_uops);
51507 else
51508 gcc_unreachable ();
51510 if (is_end_basic_block (insn_group))
51512 /* End of basic block is reached do end-basic-block process. */
51513 process_end_window ();
51514 return;
51518 /* Print the dispatch window, WINDOW_NUM, to FILE. */
51520 DEBUG_FUNCTION static void
51521 debug_dispatch_window_file (FILE *file, int window_num)
51523 dispatch_windows *list;
51524 int i;
51526 if (window_num == 0)
51527 list = dispatch_window_list;
51528 else
51529 list = dispatch_window_list1;
51531 fprintf (file, "Window #%d:\n", list->window_num);
51532 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
51533 list->num_insn, list->num_uops, list->window_size);
51534 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51535 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51537 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
51538 list->num_stores);
51539 fprintf (file, " insn info:\n");
51541 for (i = 0; i < MAX_INSN; i++)
51543 if (!list->window[i].insn)
51544 break;
51545 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51546 i, group_name[list->window[i].group],
51547 i, (void *)list->window[i].insn,
51548 i, list->window[i].path,
51549 i, list->window[i].byte_len,
51550 i, list->window[i].imm_bytes);
51554 /* Print to stdout a dispatch window. */
51556 DEBUG_FUNCTION void
51557 debug_dispatch_window (int window_num)
51559 debug_dispatch_window_file (stdout, window_num);
51562 /* Print INSN dispatch information to FILE. */
51564 DEBUG_FUNCTION static void
51565 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51567 int byte_len;
51568 enum insn_path path;
51569 enum dispatch_group group;
51570 int imm_size;
51571 int num_imm_operand;
51572 int num_imm32_operand;
51573 int num_imm64_operand;
51575 if (INSN_CODE (insn) < 0)
51576 return;
51578 byte_len = min_insn_size (insn);
51579 path = get_insn_path (insn);
51580 group = get_insn_group (insn);
51581 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51582 &num_imm64_operand);
51584 fprintf (file, " insn info:\n");
51585 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
51586 group_name[group], path, byte_len);
51587 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51588 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51591 /* Print to STDERR the status of the ready list with respect to
51592 dispatch windows. */
51594 DEBUG_FUNCTION void
51595 debug_ready_dispatch (void)
51597 int i;
51598 int no_ready = number_in_ready ();
51600 fprintf (stdout, "Number of ready: %d\n", no_ready);
51602 for (i = 0; i < no_ready; i++)
51603 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51606 /* This routine is the driver of the dispatch scheduler. */
51608 static void
51609 do_dispatch (rtx_insn *insn, int mode)
51611 if (mode == DISPATCH_INIT)
51612 init_dispatch_sched ();
51613 else if (mode == ADD_TO_DISPATCH_WINDOW)
51614 add_to_dispatch_window (insn);
51617 /* Return TRUE if Dispatch Scheduling is supported. */
51619 static bool
51620 has_dispatch (rtx_insn *insn, int action)
51622 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51623 && flag_dispatch_scheduler)
51624 switch (action)
51626 default:
51627 return false;
51629 case IS_DISPATCH_ON:
51630 return true;
51631 break;
51633 case IS_CMP:
51634 return is_cmp (insn);
51636 case DISPATCH_VIOLATION:
51637 return dispatch_violation ();
51639 case FITS_DISPATCH_WINDOW:
51640 return fits_dispatch_window (insn);
51643 return false;
51646 /* Implementation of reassociation_width target hook used by
51647 reassoc phase to identify parallelism level in reassociated
51648 tree. Statements tree_code is passed in OPC. Arguments type
51649 is passed in MODE.
51651 Currently parallel reassociation is enabled for Atom
51652 processors only and we set reassociation width to be 2
51653 because Atom may issue up to 2 instructions per cycle.
51655 Return value should be fixed if parallel reassociation is
51656 enabled for other processors. */
51658 static int
51659 ix86_reassociation_width (unsigned int, machine_mode mode)
51661 /* Vector part. */
51662 if (VECTOR_MODE_P (mode))
51664 if (TARGET_VECTOR_PARALLEL_EXECUTION)
51665 return 2;
51666 else
51667 return 1;
51670 /* Scalar part. */
51671 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51672 return 2;
51673 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51674 return 2;
51675 else
51676 return 1;
51679 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
51680 place emms and femms instructions. */
51682 static machine_mode
51683 ix86_preferred_simd_mode (machine_mode mode)
51685 if (!TARGET_SSE)
51686 return word_mode;
51688 switch (mode)
51690 case QImode:
51691 return TARGET_AVX512BW ? V64QImode :
51692 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51693 case HImode:
51694 return TARGET_AVX512BW ? V32HImode :
51695 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51696 case SImode:
51697 return TARGET_AVX512F ? V16SImode :
51698 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51699 case DImode:
51700 return TARGET_AVX512F ? V8DImode :
51701 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51703 case SFmode:
51704 if (TARGET_AVX512F)
51705 return V16SFmode;
51706 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51707 return V8SFmode;
51708 else
51709 return V4SFmode;
51711 case DFmode:
51712 if (!TARGET_VECTORIZE_DOUBLE)
51713 return word_mode;
51714 else if (TARGET_AVX512F)
51715 return V8DFmode;
51716 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51717 return V4DFmode;
51718 else if (TARGET_SSE2)
51719 return V2DFmode;
51720 /* FALLTHRU */
51722 default:
51723 return word_mode;
51727 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
51728 vectors. If AVX512F is enabled then try vectorizing with 512bit,
51729 256bit and 128bit vectors. */
51731 static unsigned int
51732 ix86_autovectorize_vector_sizes (void)
51734 return TARGET_AVX512F ? 64 | 32 | 16 :
51735 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51740 /* Return class of registers which could be used for pseudo of MODE
51741 and of class RCLASS for spilling instead of memory. Return NO_REGS
51742 if it is not possible or non-profitable. */
51743 static reg_class_t
51744 ix86_spill_class (reg_class_t rclass, machine_mode mode)
51746 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51747 && (mode == SImode || (TARGET_64BIT && mode == DImode))
51748 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51749 return ALL_SSE_REGS;
51750 return NO_REGS;
51753 /* Implement targetm.vectorize.init_cost. */
51755 static void *
51756 ix86_init_cost (struct loop *)
51758 unsigned *cost = XNEWVEC (unsigned, 3);
51759 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51760 return cost;
51763 /* Implement targetm.vectorize.add_stmt_cost. */
51765 static unsigned
51766 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51767 struct _stmt_vec_info *stmt_info, int misalign,
51768 enum vect_cost_model_location where)
51770 unsigned *cost = (unsigned *) data;
51771 unsigned retval = 0;
51773 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51774 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51776 /* Statements in an inner loop relative to the loop being
51777 vectorized are weighted more heavily. The value here is
51778 arbitrary and could potentially be improved with analysis. */
51779 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51780 count *= 50; /* FIXME. */
51782 retval = (unsigned) (count * stmt_cost);
51784 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51785 for Silvermont as it has out of order integer pipeline and can execute
51786 2 scalar instruction per tick, but has in order SIMD pipeline. */
51787 if (TARGET_SILVERMONT || TARGET_INTEL)
51788 if (stmt_info && stmt_info->stmt)
51790 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51791 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51792 retval = (retval * 17) / 10;
51795 cost[where] += retval;
51797 return retval;
51800 /* Implement targetm.vectorize.finish_cost. */
51802 static void
51803 ix86_finish_cost (void *data, unsigned *prologue_cost,
51804 unsigned *body_cost, unsigned *epilogue_cost)
51806 unsigned *cost = (unsigned *) data;
51807 *prologue_cost = cost[vect_prologue];
51808 *body_cost = cost[vect_body];
51809 *epilogue_cost = cost[vect_epilogue];
51812 /* Implement targetm.vectorize.destroy_cost_data. */
51814 static void
51815 ix86_destroy_cost_data (void *data)
51817 free (data);
51820 /* Validate target specific memory model bits in VAL. */
51822 static unsigned HOST_WIDE_INT
51823 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51825 enum memmodel model = memmodel_from_int (val);
51826 bool strong;
51828 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51829 |MEMMODEL_MASK)
51830 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51832 warning (OPT_Winvalid_memory_model,
51833 "Unknown architecture specific memory model");
51834 return MEMMODEL_SEQ_CST;
51836 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51837 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51839 warning (OPT_Winvalid_memory_model,
51840 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51841 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51843 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51845 warning (OPT_Winvalid_memory_model,
51846 "HLE_RELEASE not used with RELEASE or stronger memory model");
51847 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51849 return val;
51852 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51853 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51854 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
51855 or number of vecsize_mangle variants that should be emitted. */
51857 static int
51858 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51859 struct cgraph_simd_clone *clonei,
51860 tree base_type, int num)
51862 int ret = 1;
51864 if (clonei->simdlen
51865 && (clonei->simdlen < 2
51866 || clonei->simdlen > 16
51867 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51869 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51870 "unsupported simdlen %d", clonei->simdlen);
51871 return 0;
51874 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51875 if (TREE_CODE (ret_type) != VOID_TYPE)
51876 switch (TYPE_MODE (ret_type))
51878 case QImode:
51879 case HImode:
51880 case SImode:
51881 case DImode:
51882 case SFmode:
51883 case DFmode:
51884 /* case SCmode: */
51885 /* case DCmode: */
51886 break;
51887 default:
51888 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51889 "unsupported return type %qT for simd\n", ret_type);
51890 return 0;
51893 tree t;
51894 int i;
51896 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51897 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
51898 switch (TYPE_MODE (TREE_TYPE (t)))
51900 case QImode:
51901 case HImode:
51902 case SImode:
51903 case DImode:
51904 case SFmode:
51905 case DFmode:
51906 /* case SCmode: */
51907 /* case DCmode: */
51908 break;
51909 default:
51910 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51911 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51912 return 0;
51915 if (clonei->cilk_elemental)
51917 /* Parse here processor clause. If not present, default to 'b'. */
51918 clonei->vecsize_mangle = 'b';
51920 else if (!TREE_PUBLIC (node->decl))
51922 /* If the function isn't exported, we can pick up just one ISA
51923 for the clones. */
51924 if (TARGET_AVX2)
51925 clonei->vecsize_mangle = 'd';
51926 else if (TARGET_AVX)
51927 clonei->vecsize_mangle = 'c';
51928 else
51929 clonei->vecsize_mangle = 'b';
51930 ret = 1;
51932 else
51934 clonei->vecsize_mangle = "bcd"[num];
51935 ret = 3;
51937 switch (clonei->vecsize_mangle)
51939 case 'b':
51940 clonei->vecsize_int = 128;
51941 clonei->vecsize_float = 128;
51942 break;
51943 case 'c':
51944 clonei->vecsize_int = 128;
51945 clonei->vecsize_float = 256;
51946 break;
51947 case 'd':
51948 clonei->vecsize_int = 256;
51949 clonei->vecsize_float = 256;
51950 break;
51952 if (clonei->simdlen == 0)
51954 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51955 clonei->simdlen = clonei->vecsize_int;
51956 else
51957 clonei->simdlen = clonei->vecsize_float;
51958 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51959 if (clonei->simdlen > 16)
51960 clonei->simdlen = 16;
51962 return ret;
51965 /* Add target attribute to SIMD clone NODE if needed. */
51967 static void
51968 ix86_simd_clone_adjust (struct cgraph_node *node)
51970 const char *str = NULL;
51971 gcc_assert (node->decl == cfun->decl);
51972 switch (node->simdclone->vecsize_mangle)
51974 case 'b':
51975 if (!TARGET_SSE2)
51976 str = "sse2";
51977 break;
51978 case 'c':
51979 if (!TARGET_AVX)
51980 str = "avx";
51981 break;
51982 case 'd':
51983 if (!TARGET_AVX2)
51984 str = "avx2";
51985 break;
51986 default:
51987 gcc_unreachable ();
51989 if (str == NULL)
51990 return;
51991 push_cfun (NULL);
51992 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51993 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51994 gcc_assert (ok);
51995 pop_cfun ();
51996 ix86_reset_previous_fndecl ();
51997 ix86_set_current_function (node->decl);
52000 /* If SIMD clone NODE can't be used in a vectorized loop
52001 in current function, return -1, otherwise return a badness of using it
52002 (0 if it is most desirable from vecsize_mangle point of view, 1
52003 slightly less desirable, etc.). */
52005 static int
52006 ix86_simd_clone_usable (struct cgraph_node *node)
52008 switch (node->simdclone->vecsize_mangle)
52010 case 'b':
52011 if (!TARGET_SSE2)
52012 return -1;
52013 if (!TARGET_AVX)
52014 return 0;
52015 return TARGET_AVX2 ? 2 : 1;
52016 case 'c':
52017 if (!TARGET_AVX)
52018 return -1;
52019 return TARGET_AVX2 ? 1 : 0;
52020 break;
52021 case 'd':
52022 if (!TARGET_AVX2)
52023 return -1;
52024 return 0;
52025 default:
52026 gcc_unreachable ();
52030 /* This function adjusts the unroll factor based on
52031 the hardware capabilities. For ex, bdver3 has
52032 a loop buffer which makes unrolling of smaller
52033 loops less important. This function decides the
52034 unroll factor using number of memory references
52035 (value 32 is used) as a heuristic. */
52037 static unsigned
52038 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
52040 basic_block *bbs;
52041 rtx_insn *insn;
52042 unsigned i;
52043 unsigned mem_count = 0;
52045 if (!TARGET_ADJUST_UNROLL)
52046 return nunroll;
52048 /* Count the number of memory references within the loop body.
52049 This value determines the unrolling factor for bdver3 and bdver4
52050 architectures. */
52051 subrtx_iterator::array_type array;
52052 bbs = get_loop_body (loop);
52053 for (i = 0; i < loop->num_nodes; i++)
52054 FOR_BB_INSNS (bbs[i], insn)
52055 if (NONDEBUG_INSN_P (insn))
52056 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
52057 if (const_rtx x = *iter)
52058 if (MEM_P (x))
52060 machine_mode mode = GET_MODE (x);
52061 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
52062 if (n_words > 4)
52063 mem_count += 2;
52064 else
52065 mem_count += 1;
52067 free (bbs);
52069 if (mem_count && mem_count <=32)
52070 return 32/mem_count;
52072 return nunroll;
52076 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
52078 static bool
52079 ix86_float_exceptions_rounding_supported_p (void)
52081 /* For x87 floating point with standard excess precision handling,
52082 there is no adddf3 pattern (since x87 floating point only has
52083 XFmode operations) so the default hook implementation gets this
52084 wrong. */
52085 return TARGET_80387 || TARGET_SSE_MATH;
52088 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
52090 static void
52091 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
52093 if (!TARGET_80387 && !TARGET_SSE_MATH)
52094 return;
52095 tree exceptions_var = create_tmp_var (integer_type_node);
52096 if (TARGET_80387)
52098 tree fenv_index_type = build_index_type (size_int (6));
52099 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
52100 tree fenv_var = create_tmp_var (fenv_type);
52101 mark_addressable (fenv_var);
52102 tree fenv_ptr = build_pointer_type (fenv_type);
52103 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
52104 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
52105 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
52106 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
52107 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
52108 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
52109 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
52110 tree hold_fnclex = build_call_expr (fnclex, 0);
52111 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
52112 hold_fnclex);
52113 *clear = build_call_expr (fnclex, 0);
52114 tree sw_var = create_tmp_var (short_unsigned_type_node);
52115 tree fnstsw_call = build_call_expr (fnstsw, 0);
52116 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
52117 sw_var, fnstsw_call);
52118 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
52119 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
52120 exceptions_var, exceptions_x87);
52121 *update = build2 (COMPOUND_EXPR, integer_type_node,
52122 sw_mod, update_mod);
52123 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
52124 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
52126 if (TARGET_SSE_MATH)
52128 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
52129 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
52130 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
52131 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
52132 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
52133 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
52134 mxcsr_orig_var, stmxcsr_hold_call);
52135 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
52136 mxcsr_orig_var,
52137 build_int_cst (unsigned_type_node, 0x1f80));
52138 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
52139 build_int_cst (unsigned_type_node, 0xffffffc0));
52140 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
52141 mxcsr_mod_var, hold_mod_val);
52142 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52143 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
52144 hold_assign_orig, hold_assign_mod);
52145 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
52146 ldmxcsr_hold_call);
52147 if (*hold)
52148 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
52149 else
52150 *hold = hold_all;
52151 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52152 if (*clear)
52153 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
52154 ldmxcsr_clear_call);
52155 else
52156 *clear = ldmxcsr_clear_call;
52157 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
52158 tree exceptions_sse = fold_convert (integer_type_node,
52159 stxmcsr_update_call);
52160 if (*update)
52162 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
52163 exceptions_var, exceptions_sse);
52164 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
52165 exceptions_var, exceptions_mod);
52166 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
52167 exceptions_assign);
52169 else
52170 *update = build2 (MODIFY_EXPR, integer_type_node,
52171 exceptions_var, exceptions_sse);
52172 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
52173 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52174 ldmxcsr_update_call);
52176 tree atomic_feraiseexcept
52177 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
52178 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
52179 1, exceptions_var);
52180 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52181 atomic_feraiseexcept_call);
52184 /* Return mode to be used for bounds or VOIDmode
52185 if bounds are not supported. */
52187 static enum machine_mode
52188 ix86_mpx_bound_mode ()
52190 /* Do not support pointer checker if MPX
52191 is not enabled. */
52192 if (!TARGET_MPX)
52194 if (flag_check_pointer_bounds)
52195 warning (0, "Pointer Checker requires MPX support on this target."
52196 " Use -mmpx options to enable MPX.");
52197 return VOIDmode;
52200 return BNDmode;
52203 /* Return constant used to statically initialize constant bounds.
52205 This function is used to create special bound values. For now
52206 only INIT bounds and NONE bounds are expected. More special
52207 values may be added later. */
52209 static tree
52210 ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
52212 tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52213 : build_zero_cst (pointer_sized_int_node);
52214 tree high = ub ? build_zero_cst (pointer_sized_int_node)
52215 : build_minus_one_cst (pointer_sized_int_node);
52217 /* This function is supposed to be used to create INIT and
52218 NONE bounds only. */
52219 gcc_assert ((lb == 0 && ub == -1)
52220 || (lb == -1 && ub == 0));
52222 return build_complex (NULL, low, high);
52225 /* Generate a list of statements STMTS to initialize pointer bounds
52226 variable VAR with bounds LB and UB. Return the number of generated
52227 statements. */
52229 static int
52230 ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52232 tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52233 tree lhs, modify, var_p;
52235 ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52236 var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52238 lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52239 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52240 append_to_statement_list (modify, stmts);
52242 lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52243 build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52244 TYPE_SIZE_UNIT (pointer_sized_int_node)));
52245 modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52246 append_to_statement_list (modify, stmts);
52248 return 2;
52251 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52252 /* For i386, common symbol is local only for non-PIE binaries. For
52253 x86-64, common symbol is local only for non-PIE binaries or linker
52254 supports copy reloc in PIE binaries. */
52256 static bool
52257 ix86_binds_local_p (const_tree exp)
52259 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52260 (!flag_pic
52261 || (TARGET_64BIT
52262 && HAVE_LD_PIE_COPYRELOC != 0)));
52264 #endif
52266 /* If MEM is in the form of [base+offset], extract the two parts
52267 of address and set to BASE and OFFSET, otherwise return false. */
52269 static bool
52270 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
52272 rtx addr;
52274 gcc_assert (MEM_P (mem));
52276 addr = XEXP (mem, 0);
52278 if (GET_CODE (addr) == CONST)
52279 addr = XEXP (addr, 0);
52281 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
52283 *base = addr;
52284 *offset = const0_rtx;
52285 return true;
52288 if (GET_CODE (addr) == PLUS
52289 && (REG_P (XEXP (addr, 0))
52290 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
52291 && CONST_INT_P (XEXP (addr, 1)))
52293 *base = XEXP (addr, 0);
52294 *offset = XEXP (addr, 1);
52295 return true;
52298 return false;
52301 /* Given OPERANDS of consecutive load/store, check if we can merge
52302 them into move multiple. LOAD is true if they are load instructions.
52303 MODE is the mode of memory operands. */
52305 bool
52306 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
52307 enum machine_mode mode)
52309 HOST_WIDE_INT offval_1, offval_2, msize;
52310 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
52312 if (load)
52314 mem_1 = operands[1];
52315 mem_2 = operands[3];
52316 reg_1 = operands[0];
52317 reg_2 = operands[2];
52319 else
52321 mem_1 = operands[0];
52322 mem_2 = operands[2];
52323 reg_1 = operands[1];
52324 reg_2 = operands[3];
52327 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
52329 if (REGNO (reg_1) != REGNO (reg_2))
52330 return false;
52332 /* Check if the addresses are in the form of [base+offset]. */
52333 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
52334 return false;
52335 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
52336 return false;
52338 /* Check if the bases are the same. */
52339 if (!rtx_equal_p (base_1, base_2))
52340 return false;
52342 offval_1 = INTVAL (offset_1);
52343 offval_2 = INTVAL (offset_2);
52344 msize = GET_MODE_SIZE (mode);
52345 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
52346 if (offval_1 + msize != offval_2)
52347 return false;
52349 return true;
52352 /* Initialize the GCC target structure. */
52353 #undef TARGET_RETURN_IN_MEMORY
52354 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52356 #undef TARGET_LEGITIMIZE_ADDRESS
52357 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52359 #undef TARGET_ATTRIBUTE_TABLE
52360 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52361 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52362 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52363 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52364 # undef TARGET_MERGE_DECL_ATTRIBUTES
52365 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52366 #endif
52368 #undef TARGET_COMP_TYPE_ATTRIBUTES
52369 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52371 #undef TARGET_INIT_BUILTINS
52372 #define TARGET_INIT_BUILTINS ix86_init_builtins
52373 #undef TARGET_BUILTIN_DECL
52374 #define TARGET_BUILTIN_DECL ix86_builtin_decl
52375 #undef TARGET_EXPAND_BUILTIN
52376 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52378 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52379 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52380 ix86_builtin_vectorized_function
52382 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52383 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52385 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52386 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52388 #undef TARGET_VECTORIZE_BUILTIN_GATHER
52389 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52391 #undef TARGET_BUILTIN_RECIPROCAL
52392 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52394 #undef TARGET_ASM_FUNCTION_EPILOGUE
52395 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52397 #undef TARGET_ENCODE_SECTION_INFO
52398 #ifndef SUBTARGET_ENCODE_SECTION_INFO
52399 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52400 #else
52401 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52402 #endif
52404 #undef TARGET_ASM_OPEN_PAREN
52405 #define TARGET_ASM_OPEN_PAREN ""
52406 #undef TARGET_ASM_CLOSE_PAREN
52407 #define TARGET_ASM_CLOSE_PAREN ""
52409 #undef TARGET_ASM_BYTE_OP
52410 #define TARGET_ASM_BYTE_OP ASM_BYTE
52412 #undef TARGET_ASM_ALIGNED_HI_OP
52413 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52414 #undef TARGET_ASM_ALIGNED_SI_OP
52415 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52416 #ifdef ASM_QUAD
52417 #undef TARGET_ASM_ALIGNED_DI_OP
52418 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52419 #endif
52421 #undef TARGET_PROFILE_BEFORE_PROLOGUE
52422 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52424 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52425 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52427 #undef TARGET_ASM_UNALIGNED_HI_OP
52428 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52429 #undef TARGET_ASM_UNALIGNED_SI_OP
52430 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52431 #undef TARGET_ASM_UNALIGNED_DI_OP
52432 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52434 #undef TARGET_PRINT_OPERAND
52435 #define TARGET_PRINT_OPERAND ix86_print_operand
52436 #undef TARGET_PRINT_OPERAND_ADDRESS
52437 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52438 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52439 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52440 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52441 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52443 #undef TARGET_SCHED_INIT_GLOBAL
52444 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52445 #undef TARGET_SCHED_ADJUST_COST
52446 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52447 #undef TARGET_SCHED_ISSUE_RATE
52448 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52449 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52450 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52451 ia32_multipass_dfa_lookahead
52452 #undef TARGET_SCHED_MACRO_FUSION_P
52453 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52454 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52455 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52457 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
52458 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52460 #undef TARGET_MEMMODEL_CHECK
52461 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52463 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52464 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52466 #ifdef HAVE_AS_TLS
52467 #undef TARGET_HAVE_TLS
52468 #define TARGET_HAVE_TLS true
52469 #endif
52470 #undef TARGET_CANNOT_FORCE_CONST_MEM
52471 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52472 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52473 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52475 #undef TARGET_DELEGITIMIZE_ADDRESS
52476 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52478 #undef TARGET_MS_BITFIELD_LAYOUT_P
52479 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52481 #if TARGET_MACHO
52482 #undef TARGET_BINDS_LOCAL_P
52483 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52484 #else
52485 #undef TARGET_BINDS_LOCAL_P
52486 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52487 #endif
52488 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52489 #undef TARGET_BINDS_LOCAL_P
52490 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52491 #endif
52493 #undef TARGET_ASM_OUTPUT_MI_THUNK
52494 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52498 #undef TARGET_ASM_FILE_START
52499 #define TARGET_ASM_FILE_START x86_file_start
52501 #undef TARGET_OPTION_OVERRIDE
52502 #define TARGET_OPTION_OVERRIDE ix86_option_override
52504 #undef TARGET_REGISTER_MOVE_COST
52505 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52506 #undef TARGET_MEMORY_MOVE_COST
52507 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52508 #undef TARGET_RTX_COSTS
52509 #define TARGET_RTX_COSTS ix86_rtx_costs
52510 #undef TARGET_ADDRESS_COST
52511 #define TARGET_ADDRESS_COST ix86_address_cost
52513 #undef TARGET_FIXED_CONDITION_CODE_REGS
52514 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52515 #undef TARGET_CC_MODES_COMPATIBLE
52516 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52518 #undef TARGET_MACHINE_DEPENDENT_REORG
52519 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52521 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52522 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52524 #undef TARGET_BUILD_BUILTIN_VA_LIST
52525 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52527 #undef TARGET_FOLD_BUILTIN
52528 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
52530 #undef TARGET_COMPARE_VERSION_PRIORITY
52531 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52533 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52534 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52535 ix86_generate_version_dispatcher_body
52537 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52538 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52539 ix86_get_function_versions_dispatcher
52541 #undef TARGET_ENUM_VA_LIST_P
52542 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52544 #undef TARGET_FN_ABI_VA_LIST
52545 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52547 #undef TARGET_CANONICAL_VA_LIST_TYPE
52548 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52550 #undef TARGET_EXPAND_BUILTIN_VA_START
52551 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52553 #undef TARGET_MD_ASM_ADJUST
52554 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
52556 #undef TARGET_PROMOTE_PROTOTYPES
52557 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52558 #undef TARGET_SETUP_INCOMING_VARARGS
52559 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52560 #undef TARGET_MUST_PASS_IN_STACK
52561 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52562 #undef TARGET_FUNCTION_ARG_ADVANCE
52563 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52564 #undef TARGET_FUNCTION_ARG
52565 #define TARGET_FUNCTION_ARG ix86_function_arg
52566 #undef TARGET_INIT_PIC_REG
52567 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
52568 #undef TARGET_USE_PSEUDO_PIC_REG
52569 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52570 #undef TARGET_FUNCTION_ARG_BOUNDARY
52571 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52572 #undef TARGET_PASS_BY_REFERENCE
52573 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52574 #undef TARGET_INTERNAL_ARG_POINTER
52575 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52576 #undef TARGET_UPDATE_STACK_BOUNDARY
52577 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52578 #undef TARGET_GET_DRAP_RTX
52579 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52580 #undef TARGET_STRICT_ARGUMENT_NAMING
52581 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52582 #undef TARGET_STATIC_CHAIN
52583 #define TARGET_STATIC_CHAIN ix86_static_chain
52584 #undef TARGET_TRAMPOLINE_INIT
52585 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52586 #undef TARGET_RETURN_POPS_ARGS
52587 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52589 #undef TARGET_LEGITIMATE_COMBINED_INSN
52590 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52592 #undef TARGET_ASAN_SHADOW_OFFSET
52593 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52595 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
52596 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52598 #undef TARGET_SCALAR_MODE_SUPPORTED_P
52599 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52601 #undef TARGET_VECTOR_MODE_SUPPORTED_P
52602 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52604 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52605 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52606 ix86_libgcc_floating_mode_supported_p
52608 #undef TARGET_C_MODE_FOR_SUFFIX
52609 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52611 #ifdef HAVE_AS_TLS
52612 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52613 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52614 #endif
52616 #ifdef SUBTARGET_INSERT_ATTRIBUTES
52617 #undef TARGET_INSERT_ATTRIBUTES
52618 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52619 #endif
52621 #undef TARGET_MANGLE_TYPE
52622 #define TARGET_MANGLE_TYPE ix86_mangle_type
52624 #if !TARGET_MACHO
52625 #undef TARGET_STACK_PROTECT_FAIL
52626 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52627 #endif
52629 #undef TARGET_FUNCTION_VALUE
52630 #define TARGET_FUNCTION_VALUE ix86_function_value
52632 #undef TARGET_FUNCTION_VALUE_REGNO_P
52633 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52635 #undef TARGET_PROMOTE_FUNCTION_MODE
52636 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52638 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
52639 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
52641 #undef TARGET_MEMBER_TYPE_FORCES_BLK
52642 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52644 #undef TARGET_INSTANTIATE_DECLS
52645 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52647 #undef TARGET_SECONDARY_RELOAD
52648 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52650 #undef TARGET_CLASS_MAX_NREGS
52651 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52653 #undef TARGET_PREFERRED_RELOAD_CLASS
52654 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52655 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52656 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52657 #undef TARGET_CLASS_LIKELY_SPILLED_P
52658 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52660 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52661 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52662 ix86_builtin_vectorization_cost
52663 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52664 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52665 ix86_vectorize_vec_perm_const_ok
52666 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52667 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52668 ix86_preferred_simd_mode
52669 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52670 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52671 ix86_autovectorize_vector_sizes
52672 #undef TARGET_VECTORIZE_INIT_COST
52673 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52674 #undef TARGET_VECTORIZE_ADD_STMT_COST
52675 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52676 #undef TARGET_VECTORIZE_FINISH_COST
52677 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52678 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
52679 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52681 #undef TARGET_SET_CURRENT_FUNCTION
52682 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52684 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
52685 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52687 #undef TARGET_OPTION_SAVE
52688 #define TARGET_OPTION_SAVE ix86_function_specific_save
52690 #undef TARGET_OPTION_RESTORE
52691 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
52693 #undef TARGET_OPTION_POST_STREAM_IN
52694 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52696 #undef TARGET_OPTION_PRINT
52697 #define TARGET_OPTION_PRINT ix86_function_specific_print
52699 #undef TARGET_OPTION_FUNCTION_VERSIONS
52700 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52702 #undef TARGET_CAN_INLINE_P
52703 #define TARGET_CAN_INLINE_P ix86_can_inline_p
52705 #undef TARGET_EXPAND_TO_RTL_HOOK
52706 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
52708 #undef TARGET_LEGITIMATE_ADDRESS_P
52709 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52711 #undef TARGET_LRA_P
52712 #define TARGET_LRA_P hook_bool_void_true
52714 #undef TARGET_REGISTER_PRIORITY
52715 #define TARGET_REGISTER_PRIORITY ix86_register_priority
52717 #undef TARGET_REGISTER_USAGE_LEVELING_P
52718 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52720 #undef TARGET_LEGITIMATE_CONSTANT_P
52721 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52723 #undef TARGET_FRAME_POINTER_REQUIRED
52724 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52726 #undef TARGET_CAN_ELIMINATE
52727 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
52729 #undef TARGET_EXTRA_LIVE_ON_ENTRY
52730 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52732 #undef TARGET_ASM_CODE_END
52733 #define TARGET_ASM_CODE_END ix86_code_end
52735 #undef TARGET_CONDITIONAL_REGISTER_USAGE
52736 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52738 #if TARGET_MACHO
52739 #undef TARGET_INIT_LIBFUNCS
52740 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52741 #endif
52743 #undef TARGET_LOOP_UNROLL_ADJUST
52744 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52746 #undef TARGET_SPILL_CLASS
52747 #define TARGET_SPILL_CLASS ix86_spill_class
52749 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52750 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52751 ix86_simd_clone_compute_vecsize_and_simdlen
52753 #undef TARGET_SIMD_CLONE_ADJUST
52754 #define TARGET_SIMD_CLONE_ADJUST \
52755 ix86_simd_clone_adjust
52757 #undef TARGET_SIMD_CLONE_USABLE
52758 #define TARGET_SIMD_CLONE_USABLE \
52759 ix86_simd_clone_usable
52761 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52762 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52763 ix86_float_exceptions_rounding_supported_p
52765 #undef TARGET_MODE_EMIT
52766 #define TARGET_MODE_EMIT ix86_emit_mode_set
52768 #undef TARGET_MODE_NEEDED
52769 #define TARGET_MODE_NEEDED ix86_mode_needed
52771 #undef TARGET_MODE_AFTER
52772 #define TARGET_MODE_AFTER ix86_mode_after
52774 #undef TARGET_MODE_ENTRY
52775 #define TARGET_MODE_ENTRY ix86_mode_entry
52777 #undef TARGET_MODE_EXIT
52778 #define TARGET_MODE_EXIT ix86_mode_exit
52780 #undef TARGET_MODE_PRIORITY
52781 #define TARGET_MODE_PRIORITY ix86_mode_priority
52783 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52784 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52786 #undef TARGET_LOAD_BOUNDS_FOR_ARG
52787 #define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52789 #undef TARGET_STORE_BOUNDS_FOR_ARG
52790 #define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52792 #undef TARGET_LOAD_RETURNED_BOUNDS
52793 #define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52795 #undef TARGET_STORE_RETURNED_BOUNDS
52796 #define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52798 #undef TARGET_CHKP_BOUND_MODE
52799 #define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52801 #undef TARGET_BUILTIN_CHKP_FUNCTION
52802 #define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52804 #undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52805 #define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52807 #undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52808 #define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52810 #undef TARGET_CHKP_INITIALIZE_BOUNDS
52811 #define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52813 #undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52814 #define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52816 #undef TARGET_OFFLOAD_OPTIONS
52817 #define TARGET_OFFLOAD_OPTIONS \
52818 ix86_offload_options
52820 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52821 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52823 struct gcc_target targetm = TARGET_INITIALIZER;
52825 #include "gt-i386.h"