Initial support for AVX-512{VL,BW,DQ}
[official-gcc.git] / gcc / config / i386 / i386.c
blobb385bd00179bad61c304dae5d7f641200cd56ed6
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
87 #include "builtins.h"
89 static rtx legitimize_dllimport_symbol (rtx, bool);
90 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
91 static rtx legitimize_pe_coff_symbol (rtx, bool);
93 #ifndef CHECK_STACK_LIMIT
94 #define CHECK_STACK_LIMIT (-1)
95 #endif
97 /* Return index of given mode in mult and division cost tables. */
98 #define MODE_INDEX(mode) \
99 ((mode) == QImode ? 0 \
100 : (mode) == HImode ? 1 \
101 : (mode) == SImode ? 2 \
102 : (mode) == DImode ? 3 \
103 : 4)
105 /* Processor costs (relative to an add) */
106 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
107 #define COSTS_N_BYTES(N) ((N) * 2)
109 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
111 static stringop_algs ix86_size_memcpy[2] = {
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
113 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
114 static stringop_algs ix86_size_memset[2] = {
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
116 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
118 const
119 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
120 COSTS_N_BYTES (2), /* cost of an add instruction */
121 COSTS_N_BYTES (3), /* cost of a lea instruction */
122 COSTS_N_BYTES (2), /* variable shift costs */
123 COSTS_N_BYTES (3), /* constant shift costs */
124 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
125 COSTS_N_BYTES (3), /* HI */
126 COSTS_N_BYTES (3), /* SI */
127 COSTS_N_BYTES (3), /* DI */
128 COSTS_N_BYTES (5)}, /* other */
129 0, /* cost of multiply per each bit set */
130 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
131 COSTS_N_BYTES (3), /* HI */
132 COSTS_N_BYTES (3), /* SI */
133 COSTS_N_BYTES (3), /* DI */
134 COSTS_N_BYTES (5)}, /* other */
135 COSTS_N_BYTES (3), /* cost of movsx */
136 COSTS_N_BYTES (3), /* cost of movzx */
137 0, /* "large" insn */
138 2, /* MOVE_RATIO */
139 2, /* cost for loading QImode using movzbl */
140 {2, 2, 2}, /* cost of loading integer registers
141 in QImode, HImode and SImode.
142 Relative to reg-reg move (2). */
143 {2, 2, 2}, /* cost of storing integer registers */
144 2, /* cost of reg,reg fld/fst */
145 {2, 2, 2}, /* cost of loading fp registers
146 in SFmode, DFmode and XFmode */
147 {2, 2, 2}, /* cost of storing fp registers
148 in SFmode, DFmode and XFmode */
149 3, /* cost of moving MMX register */
150 {3, 3}, /* cost of loading MMX registers
151 in SImode and DImode */
152 {3, 3}, /* cost of storing MMX registers
153 in SImode and DImode */
154 3, /* cost of moving SSE register */
155 {3, 3, 3}, /* cost of loading SSE registers
156 in SImode, DImode and TImode */
157 {3, 3, 3}, /* cost of storing SSE registers
158 in SImode, DImode and TImode */
159 3, /* MMX or SSE register to integer */
160 0, /* size of l1 cache */
161 0, /* size of l2 cache */
162 0, /* size of prefetch block */
163 0, /* number of parallel prefetches */
164 2, /* Branch cost */
165 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
166 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
167 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
168 COSTS_N_BYTES (2), /* cost of FABS instruction. */
169 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
170 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
171 ix86_size_memcpy,
172 ix86_size_memset,
173 1, /* scalar_stmt_cost. */
174 1, /* scalar load_cost. */
175 1, /* scalar_store_cost. */
176 1, /* vec_stmt_cost. */
177 1, /* vec_to_scalar_cost. */
178 1, /* scalar_to_vec_cost. */
179 1, /* vec_align_load_cost. */
180 1, /* vec_unalign_load_cost. */
181 1, /* vec_store_cost. */
182 1, /* cond_taken_branch_cost. */
183 1, /* cond_not_taken_branch_cost. */
186 /* Processor costs (relative to an add) */
187 static stringop_algs i386_memcpy[2] = {
188 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
189 DUMMY_STRINGOP_ALGS};
190 static stringop_algs i386_memset[2] = {
191 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
192 DUMMY_STRINGOP_ALGS};
194 static const
195 struct processor_costs i386_cost = { /* 386 specific costs */
196 COSTS_N_INSNS (1), /* cost of an add instruction */
197 COSTS_N_INSNS (1), /* cost of a lea instruction */
198 COSTS_N_INSNS (3), /* variable shift costs */
199 COSTS_N_INSNS (2), /* constant shift costs */
200 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
201 COSTS_N_INSNS (6), /* HI */
202 COSTS_N_INSNS (6), /* SI */
203 COSTS_N_INSNS (6), /* DI */
204 COSTS_N_INSNS (6)}, /* other */
205 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
206 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
207 COSTS_N_INSNS (23), /* HI */
208 COSTS_N_INSNS (23), /* SI */
209 COSTS_N_INSNS (23), /* DI */
210 COSTS_N_INSNS (23)}, /* other */
211 COSTS_N_INSNS (3), /* cost of movsx */
212 COSTS_N_INSNS (2), /* cost of movzx */
213 15, /* "large" insn */
214 3, /* MOVE_RATIO */
215 4, /* cost for loading QImode using movzbl */
216 {2, 4, 2}, /* cost of loading integer registers
217 in QImode, HImode and SImode.
218 Relative to reg-reg move (2). */
219 {2, 4, 2}, /* cost of storing integer registers */
220 2, /* cost of reg,reg fld/fst */
221 {8, 8, 8}, /* cost of loading fp registers
222 in SFmode, DFmode and XFmode */
223 {8, 8, 8}, /* cost of storing fp registers
224 in SFmode, DFmode and XFmode */
225 2, /* cost of moving MMX register */
226 {4, 8}, /* cost of loading MMX registers
227 in SImode and DImode */
228 {4, 8}, /* cost of storing MMX registers
229 in SImode and DImode */
230 2, /* cost of moving SSE register */
231 {4, 8, 16}, /* cost of loading SSE registers
232 in SImode, DImode and TImode */
233 {4, 8, 16}, /* cost of storing SSE registers
234 in SImode, DImode and TImode */
235 3, /* MMX or SSE register to integer */
236 0, /* size of l1 cache */
237 0, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
240 1, /* Branch cost */
241 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (22), /* cost of FABS instruction. */
245 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
247 i386_memcpy,
248 i386_memset,
249 1, /* scalar_stmt_cost. */
250 1, /* scalar load_cost. */
251 1, /* scalar_store_cost. */
252 1, /* vec_stmt_cost. */
253 1, /* vec_to_scalar_cost. */
254 1, /* scalar_to_vec_cost. */
255 1, /* vec_align_load_cost. */
256 2, /* vec_unalign_load_cost. */
257 1, /* vec_store_cost. */
258 3, /* cond_taken_branch_cost. */
259 1, /* cond_not_taken_branch_cost. */
262 static stringop_algs i486_memcpy[2] = {
263 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
264 DUMMY_STRINGOP_ALGS};
265 static stringop_algs i486_memset[2] = {
266 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
267 DUMMY_STRINGOP_ALGS};
269 static const
270 struct processor_costs i486_cost = { /* 486 specific costs */
271 COSTS_N_INSNS (1), /* cost of an add instruction */
272 COSTS_N_INSNS (1), /* cost of a lea instruction */
273 COSTS_N_INSNS (3), /* variable shift costs */
274 COSTS_N_INSNS (2), /* constant shift costs */
275 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
276 COSTS_N_INSNS (12), /* HI */
277 COSTS_N_INSNS (12), /* SI */
278 COSTS_N_INSNS (12), /* DI */
279 COSTS_N_INSNS (12)}, /* other */
280 1, /* cost of multiply per each bit set */
281 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
282 COSTS_N_INSNS (40), /* HI */
283 COSTS_N_INSNS (40), /* SI */
284 COSTS_N_INSNS (40), /* DI */
285 COSTS_N_INSNS (40)}, /* other */
286 COSTS_N_INSNS (3), /* cost of movsx */
287 COSTS_N_INSNS (2), /* cost of movzx */
288 15, /* "large" insn */
289 3, /* MOVE_RATIO */
290 4, /* cost for loading QImode using movzbl */
291 {2, 4, 2}, /* cost of loading integer registers
292 in QImode, HImode and SImode.
293 Relative to reg-reg move (2). */
294 {2, 4, 2}, /* cost of storing integer registers */
295 2, /* cost of reg,reg fld/fst */
296 {8, 8, 8}, /* cost of loading fp registers
297 in SFmode, DFmode and XFmode */
298 {8, 8, 8}, /* cost of storing fp registers
299 in SFmode, DFmode and XFmode */
300 2, /* cost of moving MMX register */
301 {4, 8}, /* cost of loading MMX registers
302 in SImode and DImode */
303 {4, 8}, /* cost of storing MMX registers
304 in SImode and DImode */
305 2, /* cost of moving SSE register */
306 {4, 8, 16}, /* cost of loading SSE registers
307 in SImode, DImode and TImode */
308 {4, 8, 16}, /* cost of storing SSE registers
309 in SImode, DImode and TImode */
310 3, /* MMX or SSE register to integer */
311 4, /* size of l1 cache. 486 has 8kB cache
312 shared for code and data, so 4kB is
313 not really precise. */
314 4, /* size of l2 cache */
315 0, /* size of prefetch block */
316 0, /* number of parallel prefetches */
317 1, /* Branch cost */
318 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
319 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
320 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
321 COSTS_N_INSNS (3), /* cost of FABS instruction. */
322 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
323 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
324 i486_memcpy,
325 i486_memset,
326 1, /* scalar_stmt_cost. */
327 1, /* scalar load_cost. */
328 1, /* scalar_store_cost. */
329 1, /* vec_stmt_cost. */
330 1, /* vec_to_scalar_cost. */
331 1, /* scalar_to_vec_cost. */
332 1, /* vec_align_load_cost. */
333 2, /* vec_unalign_load_cost. */
334 1, /* vec_store_cost. */
335 3, /* cond_taken_branch_cost. */
336 1, /* cond_not_taken_branch_cost. */
339 static stringop_algs pentium_memcpy[2] = {
340 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
341 DUMMY_STRINGOP_ALGS};
342 static stringop_algs pentium_memset[2] = {
343 {libcall, {{-1, rep_prefix_4_byte, false}}},
344 DUMMY_STRINGOP_ALGS};
346 static const
347 struct processor_costs pentium_cost = {
348 COSTS_N_INSNS (1), /* cost of an add instruction */
349 COSTS_N_INSNS (1), /* cost of a lea instruction */
350 COSTS_N_INSNS (4), /* variable shift costs */
351 COSTS_N_INSNS (1), /* constant shift costs */
352 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
353 COSTS_N_INSNS (11), /* HI */
354 COSTS_N_INSNS (11), /* SI */
355 COSTS_N_INSNS (11), /* DI */
356 COSTS_N_INSNS (11)}, /* other */
357 0, /* cost of multiply per each bit set */
358 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
359 COSTS_N_INSNS (25), /* HI */
360 COSTS_N_INSNS (25), /* SI */
361 COSTS_N_INSNS (25), /* DI */
362 COSTS_N_INSNS (25)}, /* other */
363 COSTS_N_INSNS (3), /* cost of movsx */
364 COSTS_N_INSNS (2), /* cost of movzx */
365 8, /* "large" insn */
366 6, /* MOVE_RATIO */
367 6, /* cost for loading QImode using movzbl */
368 {2, 4, 2}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 4, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of storing fp registers
376 in SFmode, DFmode and XFmode */
377 8, /* cost of moving MMX register */
378 {8, 8}, /* cost of loading MMX registers
379 in SImode and DImode */
380 {8, 8}, /* cost of storing MMX registers
381 in SImode and DImode */
382 2, /* cost of moving SSE register */
383 {4, 8, 16}, /* cost of loading SSE registers
384 in SImode, DImode and TImode */
385 {4, 8, 16}, /* cost of storing SSE registers
386 in SImode, DImode and TImode */
387 3, /* MMX or SSE register to integer */
388 8, /* size of l1 cache. */
389 8, /* size of l2 cache */
390 0, /* size of prefetch block */
391 0, /* number of parallel prefetches */
392 2, /* Branch cost */
393 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
394 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
395 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
396 COSTS_N_INSNS (1), /* cost of FABS instruction. */
397 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
398 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
399 pentium_memcpy,
400 pentium_memset,
401 1, /* scalar_stmt_cost. */
402 1, /* scalar load_cost. */
403 1, /* scalar_store_cost. */
404 1, /* vec_stmt_cost. */
405 1, /* vec_to_scalar_cost. */
406 1, /* scalar_to_vec_cost. */
407 1, /* vec_align_load_cost. */
408 2, /* vec_unalign_load_cost. */
409 1, /* vec_store_cost. */
410 3, /* cond_taken_branch_cost. */
411 1, /* cond_not_taken_branch_cost. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
415 (we ensure the alignment). For small blocks inline loop is still a
416 noticeable win, for bigger blocks either rep movsl or rep movsb is
417 way to go. Rep movsb has apparently more expensive startup time in CPU,
418 but after 4K the difference is down in the noise. */
419 static stringop_algs pentiumpro_memcpy[2] = {
420 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
421 {8192, rep_prefix_4_byte, false},
422 {-1, rep_prefix_1_byte, false}}},
423 DUMMY_STRINGOP_ALGS};
424 static stringop_algs pentiumpro_memset[2] = {
425 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
426 {8192, rep_prefix_4_byte, false},
427 {-1, libcall, false}}},
428 DUMMY_STRINGOP_ALGS};
429 static const
430 struct processor_costs pentiumpro_cost = {
431 COSTS_N_INSNS (1), /* cost of an add instruction */
432 COSTS_N_INSNS (1), /* cost of a lea instruction */
433 COSTS_N_INSNS (1), /* variable shift costs */
434 COSTS_N_INSNS (1), /* constant shift costs */
435 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
436 COSTS_N_INSNS (4), /* HI */
437 COSTS_N_INSNS (4), /* SI */
438 COSTS_N_INSNS (4), /* DI */
439 COSTS_N_INSNS (4)}, /* other */
440 0, /* cost of multiply per each bit set */
441 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
442 COSTS_N_INSNS (17), /* HI */
443 COSTS_N_INSNS (17), /* SI */
444 COSTS_N_INSNS (17), /* DI */
445 COSTS_N_INSNS (17)}, /* other */
446 COSTS_N_INSNS (1), /* cost of movsx */
447 COSTS_N_INSNS (1), /* cost of movzx */
448 8, /* "large" insn */
449 6, /* MOVE_RATIO */
450 2, /* cost for loading QImode using movzbl */
451 {4, 4, 4}, /* cost of loading integer registers
452 in QImode, HImode and SImode.
453 Relative to reg-reg move (2). */
454 {2, 2, 2}, /* cost of storing integer registers */
455 2, /* cost of reg,reg fld/fst */
456 {2, 2, 6}, /* cost of loading fp registers
457 in SFmode, DFmode and XFmode */
458 {4, 4, 6}, /* cost of storing fp registers
459 in SFmode, DFmode and XFmode */
460 2, /* cost of moving MMX register */
461 {2, 2}, /* cost of loading MMX registers
462 in SImode and DImode */
463 {2, 2}, /* cost of storing MMX registers
464 in SImode and DImode */
465 2, /* cost of moving SSE register */
466 {2, 2, 8}, /* cost of loading SSE registers
467 in SImode, DImode and TImode */
468 {2, 2, 8}, /* cost of storing SSE registers
469 in SImode, DImode and TImode */
470 3, /* MMX or SSE register to integer */
471 8, /* size of l1 cache. */
472 256, /* size of l2 cache */
473 32, /* size of prefetch block */
474 6, /* number of parallel prefetches */
475 2, /* Branch cost */
476 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
477 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
478 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
479 COSTS_N_INSNS (2), /* cost of FABS instruction. */
480 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
481 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
482 pentiumpro_memcpy,
483 pentiumpro_memset,
484 1, /* scalar_stmt_cost. */
485 1, /* scalar load_cost. */
486 1, /* scalar_store_cost. */
487 1, /* vec_stmt_cost. */
488 1, /* vec_to_scalar_cost. */
489 1, /* scalar_to_vec_cost. */
490 1, /* vec_align_load_cost. */
491 2, /* vec_unalign_load_cost. */
492 1, /* vec_store_cost. */
493 3, /* cond_taken_branch_cost. */
494 1, /* cond_not_taken_branch_cost. */
497 static stringop_algs geode_memcpy[2] = {
498 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
499 DUMMY_STRINGOP_ALGS};
500 static stringop_algs geode_memset[2] = {
501 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
502 DUMMY_STRINGOP_ALGS};
503 static const
504 struct processor_costs geode_cost = {
505 COSTS_N_INSNS (1), /* cost of an add instruction */
506 COSTS_N_INSNS (1), /* cost of a lea instruction */
507 COSTS_N_INSNS (2), /* variable shift costs */
508 COSTS_N_INSNS (1), /* constant shift costs */
509 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
510 COSTS_N_INSNS (4), /* HI */
511 COSTS_N_INSNS (7), /* SI */
512 COSTS_N_INSNS (7), /* DI */
513 COSTS_N_INSNS (7)}, /* other */
514 0, /* cost of multiply per each bit set */
515 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
516 COSTS_N_INSNS (23), /* HI */
517 COSTS_N_INSNS (39), /* SI */
518 COSTS_N_INSNS (39), /* DI */
519 COSTS_N_INSNS (39)}, /* other */
520 COSTS_N_INSNS (1), /* cost of movsx */
521 COSTS_N_INSNS (1), /* cost of movzx */
522 8, /* "large" insn */
523 4, /* MOVE_RATIO */
524 1, /* cost for loading QImode using movzbl */
525 {1, 1, 1}, /* cost of loading integer registers
526 in QImode, HImode and SImode.
527 Relative to reg-reg move (2). */
528 {1, 1, 1}, /* cost of storing integer registers */
529 1, /* cost of reg,reg fld/fst */
530 {1, 1, 1}, /* cost of loading fp registers
531 in SFmode, DFmode and XFmode */
532 {4, 6, 6}, /* cost of storing fp registers
533 in SFmode, DFmode and XFmode */
535 1, /* cost of moving MMX register */
536 {1, 1}, /* cost of loading MMX registers
537 in SImode and DImode */
538 {1, 1}, /* cost of storing MMX registers
539 in SImode and DImode */
540 1, /* cost of moving SSE register */
541 {1, 1, 1}, /* cost of loading SSE registers
542 in SImode, DImode and TImode */
543 {1, 1, 1}, /* cost of storing SSE registers
544 in SImode, DImode and TImode */
545 1, /* MMX or SSE register to integer */
546 64, /* size of l1 cache. */
547 128, /* size of l2 cache. */
548 32, /* size of prefetch block */
549 1, /* number of parallel prefetches */
550 1, /* Branch cost */
551 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
552 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
553 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
554 COSTS_N_INSNS (1), /* cost of FABS instruction. */
555 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
556 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
557 geode_memcpy,
558 geode_memset,
559 1, /* scalar_stmt_cost. */
560 1, /* scalar load_cost. */
561 1, /* scalar_store_cost. */
562 1, /* vec_stmt_cost. */
563 1, /* vec_to_scalar_cost. */
564 1, /* scalar_to_vec_cost. */
565 1, /* vec_align_load_cost. */
566 2, /* vec_unalign_load_cost. */
567 1, /* vec_store_cost. */
568 3, /* cond_taken_branch_cost. */
569 1, /* cond_not_taken_branch_cost. */
572 static stringop_algs k6_memcpy[2] = {
573 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
574 DUMMY_STRINGOP_ALGS};
575 static stringop_algs k6_memset[2] = {
576 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
577 DUMMY_STRINGOP_ALGS};
578 static const
579 struct processor_costs k6_cost = {
580 COSTS_N_INSNS (1), /* cost of an add instruction */
581 COSTS_N_INSNS (2), /* cost of a lea instruction */
582 COSTS_N_INSNS (1), /* variable shift costs */
583 COSTS_N_INSNS (1), /* constant shift costs */
584 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
585 COSTS_N_INSNS (3), /* HI */
586 COSTS_N_INSNS (3), /* SI */
587 COSTS_N_INSNS (3), /* DI */
588 COSTS_N_INSNS (3)}, /* other */
589 0, /* cost of multiply per each bit set */
590 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
591 COSTS_N_INSNS (18), /* HI */
592 COSTS_N_INSNS (18), /* SI */
593 COSTS_N_INSNS (18), /* DI */
594 COSTS_N_INSNS (18)}, /* other */
595 COSTS_N_INSNS (2), /* cost of movsx */
596 COSTS_N_INSNS (2), /* cost of movzx */
597 8, /* "large" insn */
598 4, /* MOVE_RATIO */
599 3, /* cost for loading QImode using movzbl */
600 {4, 5, 4}, /* cost of loading integer registers
601 in QImode, HImode and SImode.
602 Relative to reg-reg move (2). */
603 {2, 3, 2}, /* cost of storing integer registers */
604 4, /* cost of reg,reg fld/fst */
605 {6, 6, 6}, /* cost of loading fp registers
606 in SFmode, DFmode and XFmode */
607 {4, 4, 4}, /* cost of storing fp registers
608 in SFmode, DFmode and XFmode */
609 2, /* cost of moving MMX register */
610 {2, 2}, /* cost of loading MMX registers
611 in SImode and DImode */
612 {2, 2}, /* cost of storing MMX registers
613 in SImode and DImode */
614 2, /* cost of moving SSE register */
615 {2, 2, 8}, /* cost of loading SSE registers
616 in SImode, DImode and TImode */
617 {2, 2, 8}, /* cost of storing SSE registers
618 in SImode, DImode and TImode */
619 6, /* MMX or SSE register to integer */
620 32, /* size of l1 cache. */
621 32, /* size of l2 cache. Some models
622 have integrated l2 cache, but
623 optimizing for k6 is not important
624 enough to worry about that. */
625 32, /* size of prefetch block */
626 1, /* number of parallel prefetches */
627 1, /* Branch cost */
628 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (2), /* cost of FABS instruction. */
632 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
634 k6_memcpy,
635 k6_memset,
636 1, /* scalar_stmt_cost. */
637 1, /* scalar load_cost. */
638 1, /* scalar_store_cost. */
639 1, /* vec_stmt_cost. */
640 1, /* vec_to_scalar_cost. */
641 1, /* scalar_to_vec_cost. */
642 1, /* vec_align_load_cost. */
643 2, /* vec_unalign_load_cost. */
644 1, /* vec_store_cost. */
645 3, /* cond_taken_branch_cost. */
646 1, /* cond_not_taken_branch_cost. */
649 /* For some reason, Athlon deals better with REP prefix (relative to loops)
650 compared to K8. Alignment becomes important after 8 bytes for memcpy and
651 128 bytes for memset. */
652 static stringop_algs athlon_memcpy[2] = {
653 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
654 DUMMY_STRINGOP_ALGS};
655 static stringop_algs athlon_memset[2] = {
656 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
657 DUMMY_STRINGOP_ALGS};
658 static const
659 struct processor_costs athlon_cost = {
660 COSTS_N_INSNS (1), /* cost of an add instruction */
661 COSTS_N_INSNS (2), /* cost of a lea instruction */
662 COSTS_N_INSNS (1), /* variable shift costs */
663 COSTS_N_INSNS (1), /* constant shift costs */
664 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
665 COSTS_N_INSNS (5), /* HI */
666 COSTS_N_INSNS (5), /* SI */
667 COSTS_N_INSNS (5), /* DI */
668 COSTS_N_INSNS (5)}, /* other */
669 0, /* cost of multiply per each bit set */
670 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
671 COSTS_N_INSNS (26), /* HI */
672 COSTS_N_INSNS (42), /* SI */
673 COSTS_N_INSNS (74), /* DI */
674 COSTS_N_INSNS (74)}, /* other */
675 COSTS_N_INSNS (1), /* cost of movsx */
676 COSTS_N_INSNS (1), /* cost of movzx */
677 8, /* "large" insn */
678 9, /* MOVE_RATIO */
679 4, /* cost for loading QImode using movzbl */
680 {3, 4, 3}, /* cost of loading integer registers
681 in QImode, HImode and SImode.
682 Relative to reg-reg move (2). */
683 {3, 4, 3}, /* cost of storing integer registers */
684 4, /* cost of reg,reg fld/fst */
685 {4, 4, 12}, /* cost of loading fp registers
686 in SFmode, DFmode and XFmode */
687 {6, 6, 8}, /* cost of storing fp registers
688 in SFmode, DFmode and XFmode */
689 2, /* cost of moving MMX register */
690 {4, 4}, /* cost of loading MMX registers
691 in SImode and DImode */
692 {4, 4}, /* cost of storing MMX registers
693 in SImode and DImode */
694 2, /* cost of moving SSE register */
695 {4, 4, 6}, /* cost of loading SSE registers
696 in SImode, DImode and TImode */
697 {4, 4, 5}, /* cost of storing SSE registers
698 in SImode, DImode and TImode */
699 5, /* MMX or SSE register to integer */
700 64, /* size of l1 cache. */
701 256, /* size of l2 cache. */
702 64, /* size of prefetch block */
703 6, /* number of parallel prefetches */
704 5, /* Branch cost */
705 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
706 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
707 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
708 COSTS_N_INSNS (2), /* cost of FABS instruction. */
709 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
710 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
711 athlon_memcpy,
712 athlon_memset,
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
726 /* K8 has optimized REP instruction for medium sized blocks, but for very
727 small blocks it is better to use loop. For large blocks, libcall can
728 do nontemporary accesses and beat inline considerably. */
729 static stringop_algs k8_memcpy[2] = {
730 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
731 {-1, rep_prefix_4_byte, false}}},
732 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
733 {-1, libcall, false}}}};
734 static stringop_algs k8_memset[2] = {
735 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
736 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
737 {libcall, {{48, unrolled_loop, false},
738 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
739 static const
740 struct processor_costs k8_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (2), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (4), /* HI */
747 COSTS_N_INSNS (3), /* SI */
748 COSTS_N_INSNS (4), /* DI */
749 COSTS_N_INSNS (5)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (26), /* HI */
753 COSTS_N_INSNS (42), /* SI */
754 COSTS_N_INSNS (74), /* DI */
755 COSTS_N_INSNS (74)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 8, /* "large" insn */
759 9, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {3, 4, 3}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {3, 4, 3}, /* cost of storing integer registers */
765 4, /* cost of reg,reg fld/fst */
766 {4, 4, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {6, 6, 8}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 2, /* cost of moving MMX register */
771 {3, 3}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {4, 4}, /* cost of storing MMX registers
774 in SImode and DImode */
775 2, /* cost of moving SSE register */
776 {4, 3, 6}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {4, 4, 5}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 5, /* MMX or SSE register to integer */
781 64, /* size of l1 cache. */
782 512, /* size of l2 cache. */
783 64, /* size of prefetch block */
784 /* New AMD processors never drop prefetches; if they cannot be performed
785 immediately, they are queued. We set number of simultaneous prefetches
786 to a large constant to reflect this (it probably is not a good idea not
787 to limit number of prefetches at all, as their execution also takes some
788 time). */
789 100, /* number of parallel prefetches */
790 3, /* Branch cost */
791 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
792 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
793 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
794 COSTS_N_INSNS (2), /* cost of FABS instruction. */
795 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
796 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
798 k8_memcpy,
799 k8_memset,
800 4, /* scalar_stmt_cost. */
801 2, /* scalar load_cost. */
802 2, /* scalar_store_cost. */
803 5, /* vec_stmt_cost. */
804 0, /* vec_to_scalar_cost. */
805 2, /* scalar_to_vec_cost. */
806 2, /* vec_align_load_cost. */
807 3, /* vec_unalign_load_cost. */
808 3, /* vec_store_cost. */
809 3, /* cond_taken_branch_cost. */
810 2, /* cond_not_taken_branch_cost. */
813 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
814 very small blocks it is better to use loop. For large blocks, libcall can
815 do nontemporary accesses and beat inline considerably. */
816 static stringop_algs amdfam10_memcpy[2] = {
817 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
818 {-1, rep_prefix_4_byte, false}}},
819 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
820 {-1, libcall, false}}}};
821 static stringop_algs amdfam10_memset[2] = {
822 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
823 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
824 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
825 {-1, libcall, false}}}};
826 struct processor_costs amdfam10_cost = {
827 COSTS_N_INSNS (1), /* cost of an add instruction */
828 COSTS_N_INSNS (2), /* cost of a lea instruction */
829 COSTS_N_INSNS (1), /* variable shift costs */
830 COSTS_N_INSNS (1), /* constant shift costs */
831 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
832 COSTS_N_INSNS (4), /* HI */
833 COSTS_N_INSNS (3), /* SI */
834 COSTS_N_INSNS (4), /* DI */
835 COSTS_N_INSNS (5)}, /* other */
836 0, /* cost of multiply per each bit set */
837 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
838 COSTS_N_INSNS (35), /* HI */
839 COSTS_N_INSNS (51), /* SI */
840 COSTS_N_INSNS (83), /* DI */
841 COSTS_N_INSNS (83)}, /* other */
842 COSTS_N_INSNS (1), /* cost of movsx */
843 COSTS_N_INSNS (1), /* cost of movzx */
844 8, /* "large" insn */
845 9, /* MOVE_RATIO */
846 4, /* cost for loading QImode using movzbl */
847 {3, 4, 3}, /* cost of loading integer registers
848 in QImode, HImode and SImode.
849 Relative to reg-reg move (2). */
850 {3, 4, 3}, /* cost of storing integer registers */
851 4, /* cost of reg,reg fld/fst */
852 {4, 4, 12}, /* cost of loading fp registers
853 in SFmode, DFmode and XFmode */
854 {6, 6, 8}, /* cost of storing fp registers
855 in SFmode, DFmode and XFmode */
856 2, /* cost of moving MMX register */
857 {3, 3}, /* cost of loading MMX registers
858 in SImode and DImode */
859 {4, 4}, /* cost of storing MMX registers
860 in SImode and DImode */
861 2, /* cost of moving SSE register */
862 {4, 4, 3}, /* cost of loading SSE registers
863 in SImode, DImode and TImode */
864 {4, 4, 5}, /* cost of storing SSE registers
865 in SImode, DImode and TImode */
866 3, /* MMX or SSE register to integer */
867 /* On K8:
868 MOVD reg64, xmmreg Double FSTORE 4
869 MOVD reg32, xmmreg Double FSTORE 4
870 On AMDFAM10:
871 MOVD reg64, xmmreg Double FADD 3
872 1/1 1/1
873 MOVD reg32, xmmreg Double FADD 3
874 1/1 1/1 */
875 64, /* size of l1 cache. */
876 512, /* size of l2 cache. */
877 64, /* size of prefetch block */
878 /* New AMD processors never drop prefetches; if they cannot be performed
879 immediately, they are queued. We set number of simultaneous prefetches
880 to a large constant to reflect this (it probably is not a good idea not
881 to limit number of prefetches at all, as their execution also takes some
882 time). */
883 100, /* number of parallel prefetches */
884 2, /* Branch cost */
885 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
886 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
887 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
888 COSTS_N_INSNS (2), /* cost of FABS instruction. */
889 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
890 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
892 amdfam10_memcpy,
893 amdfam10_memset,
894 4, /* scalar_stmt_cost. */
895 2, /* scalar load_cost. */
896 2, /* scalar_store_cost. */
897 6, /* vec_stmt_cost. */
898 0, /* vec_to_scalar_cost. */
899 2, /* scalar_to_vec_cost. */
900 2, /* vec_align_load_cost. */
901 2, /* vec_unalign_load_cost. */
902 2, /* vec_store_cost. */
903 2, /* cond_taken_branch_cost. */
904 1, /* cond_not_taken_branch_cost. */
907 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
908 very small blocks it is better to use loop. For large blocks, libcall
909 can do nontemporary accesses and beat inline considerably. */
910 static stringop_algs bdver1_memcpy[2] = {
911 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
912 {-1, rep_prefix_4_byte, false}}},
913 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
914 {-1, libcall, false}}}};
915 static stringop_algs bdver1_memset[2] = {
916 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
917 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
918 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
919 {-1, libcall, false}}}};
921 const struct processor_costs bdver1_cost = {
922 COSTS_N_INSNS (1), /* cost of an add instruction */
923 COSTS_N_INSNS (1), /* cost of a lea instruction */
924 COSTS_N_INSNS (1), /* variable shift costs */
925 COSTS_N_INSNS (1), /* constant shift costs */
926 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
927 COSTS_N_INSNS (4), /* HI */
928 COSTS_N_INSNS (4), /* SI */
929 COSTS_N_INSNS (6), /* DI */
930 COSTS_N_INSNS (6)}, /* other */
931 0, /* cost of multiply per each bit set */
932 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
933 COSTS_N_INSNS (35), /* HI */
934 COSTS_N_INSNS (51), /* SI */
935 COSTS_N_INSNS (83), /* DI */
936 COSTS_N_INSNS (83)}, /* other */
937 COSTS_N_INSNS (1), /* cost of movsx */
938 COSTS_N_INSNS (1), /* cost of movzx */
939 8, /* "large" insn */
940 9, /* MOVE_RATIO */
941 4, /* cost for loading QImode using movzbl */
942 {5, 5, 4}, /* cost of loading integer registers
943 in QImode, HImode and SImode.
944 Relative to reg-reg move (2). */
945 {4, 4, 4}, /* cost of storing integer registers */
946 2, /* cost of reg,reg fld/fst */
947 {5, 5, 12}, /* cost of loading fp registers
948 in SFmode, DFmode and XFmode */
949 {4, 4, 8}, /* cost of storing fp registers
950 in SFmode, DFmode and XFmode */
951 2, /* cost of moving MMX register */
952 {4, 4}, /* cost of loading MMX registers
953 in SImode and DImode */
954 {4, 4}, /* cost of storing MMX registers
955 in SImode and DImode */
956 2, /* cost of moving SSE register */
957 {4, 4, 4}, /* cost of loading SSE registers
958 in SImode, DImode and TImode */
959 {4, 4, 4}, /* cost of storing SSE registers
960 in SImode, DImode and TImode */
961 2, /* MMX or SSE register to integer */
962 /* On K8:
963 MOVD reg64, xmmreg Double FSTORE 4
964 MOVD reg32, xmmreg Double FSTORE 4
965 On AMDFAM10:
966 MOVD reg64, xmmreg Double FADD 3
967 1/1 1/1
968 MOVD reg32, xmmreg Double FADD 3
969 1/1 1/1 */
970 16, /* size of l1 cache. */
971 2048, /* size of l2 cache. */
972 64, /* size of prefetch block */
973 /* New AMD processors never drop prefetches; if they cannot be performed
974 immediately, they are queued. We set number of simultaneous prefetches
975 to a large constant to reflect this (it probably is not a good idea not
976 to limit number of prefetches at all, as their execution also takes some
977 time). */
978 100, /* number of parallel prefetches */
979 2, /* Branch cost */
980 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
981 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
982 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
983 COSTS_N_INSNS (2), /* cost of FABS instruction. */
984 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
985 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
987 bdver1_memcpy,
988 bdver1_memset,
989 6, /* scalar_stmt_cost. */
990 4, /* scalar load_cost. */
991 4, /* scalar_store_cost. */
992 6, /* vec_stmt_cost. */
993 0, /* vec_to_scalar_cost. */
994 2, /* scalar_to_vec_cost. */
995 4, /* vec_align_load_cost. */
996 4, /* vec_unalign_load_cost. */
997 4, /* vec_store_cost. */
998 2, /* cond_taken_branch_cost. */
999 1, /* cond_not_taken_branch_cost. */
1002 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1003 very small blocks it is better to use loop. For large blocks, libcall
1004 can do nontemporary accesses and beat inline considerably. */
1006 static stringop_algs bdver2_memcpy[2] = {
1007 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1008 {-1, rep_prefix_4_byte, false}}},
1009 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1010 {-1, libcall, false}}}};
1011 static stringop_algs bdver2_memset[2] = {
1012 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1013 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1014 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1015 {-1, libcall, false}}}};
1017 const struct processor_costs bdver2_cost = {
1018 COSTS_N_INSNS (1), /* cost of an add instruction */
1019 COSTS_N_INSNS (1), /* cost of a lea instruction */
1020 COSTS_N_INSNS (1), /* variable shift costs */
1021 COSTS_N_INSNS (1), /* constant shift costs */
1022 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1023 COSTS_N_INSNS (4), /* HI */
1024 COSTS_N_INSNS (4), /* SI */
1025 COSTS_N_INSNS (6), /* DI */
1026 COSTS_N_INSNS (6)}, /* other */
1027 0, /* cost of multiply per each bit set */
1028 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1029 COSTS_N_INSNS (35), /* HI */
1030 COSTS_N_INSNS (51), /* SI */
1031 COSTS_N_INSNS (83), /* DI */
1032 COSTS_N_INSNS (83)}, /* other */
1033 COSTS_N_INSNS (1), /* cost of movsx */
1034 COSTS_N_INSNS (1), /* cost of movzx */
1035 8, /* "large" insn */
1036 9, /* MOVE_RATIO */
1037 4, /* cost for loading QImode using movzbl */
1038 {5, 5, 4}, /* cost of loading integer registers
1039 in QImode, HImode and SImode.
1040 Relative to reg-reg move (2). */
1041 {4, 4, 4}, /* cost of storing integer registers */
1042 2, /* cost of reg,reg fld/fst */
1043 {5, 5, 12}, /* cost of loading fp registers
1044 in SFmode, DFmode and XFmode */
1045 {4, 4, 8}, /* cost of storing fp registers
1046 in SFmode, DFmode and XFmode */
1047 2, /* cost of moving MMX register */
1048 {4, 4}, /* cost of loading MMX registers
1049 in SImode and DImode */
1050 {4, 4}, /* cost of storing MMX registers
1051 in SImode and DImode */
1052 2, /* cost of moving SSE register */
1053 {4, 4, 4}, /* cost of loading SSE registers
1054 in SImode, DImode and TImode */
1055 {4, 4, 4}, /* cost of storing SSE registers
1056 in SImode, DImode and TImode */
1057 2, /* MMX or SSE register to integer */
1058 /* On K8:
1059 MOVD reg64, xmmreg Double FSTORE 4
1060 MOVD reg32, xmmreg Double FSTORE 4
1061 On AMDFAM10:
1062 MOVD reg64, xmmreg Double FADD 3
1063 1/1 1/1
1064 MOVD reg32, xmmreg Double FADD 3
1065 1/1 1/1 */
1066 16, /* size of l1 cache. */
1067 2048, /* size of l2 cache. */
1068 64, /* size of prefetch block */
1069 /* New AMD processors never drop prefetches; if they cannot be performed
1070 immediately, they are queued. We set number of simultaneous prefetches
1071 to a large constant to reflect this (it probably is not a good idea not
1072 to limit number of prefetches at all, as their execution also takes some
1073 time). */
1074 100, /* number of parallel prefetches */
1075 2, /* Branch cost */
1076 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1077 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1078 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1079 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1080 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1081 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1083 bdver2_memcpy,
1084 bdver2_memset,
1085 6, /* scalar_stmt_cost. */
1086 4, /* scalar load_cost. */
1087 4, /* scalar_store_cost. */
1088 6, /* vec_stmt_cost. */
1089 0, /* vec_to_scalar_cost. */
1090 2, /* scalar_to_vec_cost. */
1091 4, /* vec_align_load_cost. */
1092 4, /* vec_unalign_load_cost. */
1093 4, /* vec_store_cost. */
1094 2, /* cond_taken_branch_cost. */
1095 1, /* cond_not_taken_branch_cost. */
1099 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1100 very small blocks it is better to use loop. For large blocks, libcall
1101 can do nontemporary accesses and beat inline considerably. */
1102 static stringop_algs bdver3_memcpy[2] = {
1103 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1104 {-1, rep_prefix_4_byte, false}}},
1105 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1106 {-1, libcall, false}}}};
1107 static stringop_algs bdver3_memset[2] = {
1108 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1109 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1110 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1111 {-1, libcall, false}}}};
1112 struct processor_costs bdver3_cost = {
1113 COSTS_N_INSNS (1), /* cost of an add instruction */
1114 COSTS_N_INSNS (1), /* cost of a lea instruction */
1115 COSTS_N_INSNS (1), /* variable shift costs */
1116 COSTS_N_INSNS (1), /* constant shift costs */
1117 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1118 COSTS_N_INSNS (4), /* HI */
1119 COSTS_N_INSNS (4), /* SI */
1120 COSTS_N_INSNS (6), /* DI */
1121 COSTS_N_INSNS (6)}, /* other */
1122 0, /* cost of multiply per each bit set */
1123 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1124 COSTS_N_INSNS (35), /* HI */
1125 COSTS_N_INSNS (51), /* SI */
1126 COSTS_N_INSNS (83), /* DI */
1127 COSTS_N_INSNS (83)}, /* other */
1128 COSTS_N_INSNS (1), /* cost of movsx */
1129 COSTS_N_INSNS (1), /* cost of movzx */
1130 8, /* "large" insn */
1131 9, /* MOVE_RATIO */
1132 4, /* cost for loading QImode using movzbl */
1133 {5, 5, 4}, /* cost of loading integer registers
1134 in QImode, HImode and SImode.
1135 Relative to reg-reg move (2). */
1136 {4, 4, 4}, /* cost of storing integer registers */
1137 2, /* cost of reg,reg fld/fst */
1138 {5, 5, 12}, /* cost of loading fp registers
1139 in SFmode, DFmode and XFmode */
1140 {4, 4, 8}, /* cost of storing fp registers
1141 in SFmode, DFmode and XFmode */
1142 2, /* cost of moving MMX register */
1143 {4, 4}, /* cost of loading MMX registers
1144 in SImode and DImode */
1145 {4, 4}, /* cost of storing MMX registers
1146 in SImode and DImode */
1147 2, /* cost of moving SSE register */
1148 {4, 4, 4}, /* cost of loading SSE registers
1149 in SImode, DImode and TImode */
1150 {4, 4, 4}, /* cost of storing SSE registers
1151 in SImode, DImode and TImode */
1152 2, /* MMX or SSE register to integer */
1153 16, /* size of l1 cache. */
1154 2048, /* size of l2 cache. */
1155 64, /* size of prefetch block */
1156 /* New AMD processors never drop prefetches; if they cannot be performed
1157 immediately, they are queued. We set number of simultaneous prefetches
1158 to a large constant to reflect this (it probably is not a good idea not
1159 to limit number of prefetches at all, as their execution also takes some
1160 time). */
1161 100, /* number of parallel prefetches */
1162 2, /* Branch cost */
1163 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1164 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1165 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1166 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1167 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1168 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1170 bdver3_memcpy,
1171 bdver3_memset,
1172 6, /* scalar_stmt_cost. */
1173 4, /* scalar load_cost. */
1174 4, /* scalar_store_cost. */
1175 6, /* vec_stmt_cost. */
1176 0, /* vec_to_scalar_cost. */
1177 2, /* scalar_to_vec_cost. */
1178 4, /* vec_align_load_cost. */
1179 4, /* vec_unalign_load_cost. */
1180 4, /* vec_store_cost. */
1181 2, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1186 very small blocks it is better to use loop. For large blocks, libcall
1187 can do nontemporary accesses and beat inline considerably. */
1188 static stringop_algs bdver4_memcpy[2] = {
1189 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1190 {-1, rep_prefix_4_byte, false}}},
1191 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1192 {-1, libcall, false}}}};
1193 static stringop_algs bdver4_memset[2] = {
1194 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1195 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1196 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1197 {-1, libcall, false}}}};
1198 struct processor_costs bdver4_cost = {
1199 COSTS_N_INSNS (1), /* cost of an add instruction */
1200 COSTS_N_INSNS (1), /* cost of a lea instruction */
1201 COSTS_N_INSNS (1), /* variable shift costs */
1202 COSTS_N_INSNS (1), /* constant shift costs */
1203 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1204 COSTS_N_INSNS (4), /* HI */
1205 COSTS_N_INSNS (4), /* SI */
1206 COSTS_N_INSNS (6), /* DI */
1207 COSTS_N_INSNS (6)}, /* other */
1208 0, /* cost of multiply per each bit set */
1209 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1210 COSTS_N_INSNS (35), /* HI */
1211 COSTS_N_INSNS (51), /* SI */
1212 COSTS_N_INSNS (83), /* DI */
1213 COSTS_N_INSNS (83)}, /* other */
1214 COSTS_N_INSNS (1), /* cost of movsx */
1215 COSTS_N_INSNS (1), /* cost of movzx */
1216 8, /* "large" insn */
1217 9, /* MOVE_RATIO */
1218 4, /* cost for loading QImode using movzbl */
1219 {5, 5, 4}, /* cost of loading integer registers
1220 in QImode, HImode and SImode.
1221 Relative to reg-reg move (2). */
1222 {4, 4, 4}, /* cost of storing integer registers */
1223 2, /* cost of reg,reg fld/fst */
1224 {5, 5, 12}, /* cost of loading fp registers
1225 in SFmode, DFmode and XFmode */
1226 {4, 4, 8}, /* cost of storing fp registers
1227 in SFmode, DFmode and XFmode */
1228 2, /* cost of moving MMX register */
1229 {4, 4}, /* cost of loading MMX registers
1230 in SImode and DImode */
1231 {4, 4}, /* cost of storing MMX registers
1232 in SImode and DImode */
1233 2, /* cost of moving SSE register */
1234 {4, 4, 4}, /* cost of loading SSE registers
1235 in SImode, DImode and TImode */
1236 {4, 4, 4}, /* cost of storing SSE registers
1237 in SImode, DImode and TImode */
1238 2, /* MMX or SSE register to integer */
1239 16, /* size of l1 cache. */
1240 2048, /* size of l2 cache. */
1241 64, /* size of prefetch block */
1242 /* New AMD processors never drop prefetches; if they cannot be performed
1243 immediately, they are queued. We set number of simultaneous prefetches
1244 to a large constant to reflect this (it probably is not a good idea not
1245 to limit number of prefetches at all, as their execution also takes some
1246 time). */
1247 100, /* number of parallel prefetches */
1248 2, /* Branch cost */
1249 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1250 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1251 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1252 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1253 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1254 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1256 bdver4_memcpy,
1257 bdver4_memset,
1258 6, /* scalar_stmt_cost. */
1259 4, /* scalar load_cost. */
1260 4, /* scalar_store_cost. */
1261 6, /* vec_stmt_cost. */
1262 0, /* vec_to_scalar_cost. */
1263 2, /* scalar_to_vec_cost. */
1264 4, /* vec_align_load_cost. */
1265 4, /* vec_unalign_load_cost. */
1266 4, /* vec_store_cost. */
1267 2, /* cond_taken_branch_cost. */
1268 1, /* cond_not_taken_branch_cost. */
1271 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1272 very small blocks it is better to use loop. For large blocks, libcall can
1273 do nontemporary accesses and beat inline considerably. */
1274 static stringop_algs btver1_memcpy[2] = {
1275 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1276 {-1, rep_prefix_4_byte, false}}},
1277 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1278 {-1, libcall, false}}}};
1279 static stringop_algs btver1_memset[2] = {
1280 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1281 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1282 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1283 {-1, libcall, false}}}};
1284 const struct processor_costs btver1_cost = {
1285 COSTS_N_INSNS (1), /* cost of an add instruction */
1286 COSTS_N_INSNS (2), /* cost of a lea instruction */
1287 COSTS_N_INSNS (1), /* variable shift costs */
1288 COSTS_N_INSNS (1), /* constant shift costs */
1289 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1290 COSTS_N_INSNS (4), /* HI */
1291 COSTS_N_INSNS (3), /* SI */
1292 COSTS_N_INSNS (4), /* DI */
1293 COSTS_N_INSNS (5)}, /* other */
1294 0, /* cost of multiply per each bit set */
1295 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1296 COSTS_N_INSNS (35), /* HI */
1297 COSTS_N_INSNS (51), /* SI */
1298 COSTS_N_INSNS (83), /* DI */
1299 COSTS_N_INSNS (83)}, /* other */
1300 COSTS_N_INSNS (1), /* cost of movsx */
1301 COSTS_N_INSNS (1), /* cost of movzx */
1302 8, /* "large" insn */
1303 9, /* MOVE_RATIO */
1304 4, /* cost for loading QImode using movzbl */
1305 {3, 4, 3}, /* cost of loading integer registers
1306 in QImode, HImode and SImode.
1307 Relative to reg-reg move (2). */
1308 {3, 4, 3}, /* cost of storing integer registers */
1309 4, /* cost of reg,reg fld/fst */
1310 {4, 4, 12}, /* cost of loading fp registers
1311 in SFmode, DFmode and XFmode */
1312 {6, 6, 8}, /* cost of storing fp registers
1313 in SFmode, DFmode and XFmode */
1314 2, /* cost of moving MMX register */
1315 {3, 3}, /* cost of loading MMX registers
1316 in SImode and DImode */
1317 {4, 4}, /* cost of storing MMX registers
1318 in SImode and DImode */
1319 2, /* cost of moving SSE register */
1320 {4, 4, 3}, /* cost of loading SSE registers
1321 in SImode, DImode and TImode */
1322 {4, 4, 5}, /* cost of storing SSE registers
1323 in SImode, DImode and TImode */
1324 3, /* MMX or SSE register to integer */
1325 /* On K8:
1326 MOVD reg64, xmmreg Double FSTORE 4
1327 MOVD reg32, xmmreg Double FSTORE 4
1328 On AMDFAM10:
1329 MOVD reg64, xmmreg Double FADD 3
1330 1/1 1/1
1331 MOVD reg32, xmmreg Double FADD 3
1332 1/1 1/1 */
1333 32, /* size of l1 cache. */
1334 512, /* size of l2 cache. */
1335 64, /* size of prefetch block */
1336 100, /* number of parallel prefetches */
1337 2, /* Branch cost */
1338 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1339 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1340 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1341 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1342 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1343 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1345 btver1_memcpy,
1346 btver1_memset,
1347 4, /* scalar_stmt_cost. */
1348 2, /* scalar load_cost. */
1349 2, /* scalar_store_cost. */
1350 6, /* vec_stmt_cost. */
1351 0, /* vec_to_scalar_cost. */
1352 2, /* scalar_to_vec_cost. */
1353 2, /* vec_align_load_cost. */
1354 2, /* vec_unalign_load_cost. */
1355 2, /* vec_store_cost. */
1356 2, /* cond_taken_branch_cost. */
1357 1, /* cond_not_taken_branch_cost. */
1360 static stringop_algs btver2_memcpy[2] = {
1361 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1362 {-1, rep_prefix_4_byte, false}}},
1363 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1364 {-1, libcall, false}}}};
1365 static stringop_algs btver2_memset[2] = {
1366 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1367 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1368 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1369 {-1, libcall, false}}}};
1370 const struct processor_costs btver2_cost = {
1371 COSTS_N_INSNS (1), /* cost of an add instruction */
1372 COSTS_N_INSNS (2), /* cost of a lea instruction */
1373 COSTS_N_INSNS (1), /* variable shift costs */
1374 COSTS_N_INSNS (1), /* constant shift costs */
1375 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1376 COSTS_N_INSNS (4), /* HI */
1377 COSTS_N_INSNS (3), /* SI */
1378 COSTS_N_INSNS (4), /* DI */
1379 COSTS_N_INSNS (5)}, /* other */
1380 0, /* cost of multiply per each bit set */
1381 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1382 COSTS_N_INSNS (35), /* HI */
1383 COSTS_N_INSNS (51), /* SI */
1384 COSTS_N_INSNS (83), /* DI */
1385 COSTS_N_INSNS (83)}, /* other */
1386 COSTS_N_INSNS (1), /* cost of movsx */
1387 COSTS_N_INSNS (1), /* cost of movzx */
1388 8, /* "large" insn */
1389 9, /* MOVE_RATIO */
1390 4, /* cost for loading QImode using movzbl */
1391 {3, 4, 3}, /* cost of loading integer registers
1392 in QImode, HImode and SImode.
1393 Relative to reg-reg move (2). */
1394 {3, 4, 3}, /* cost of storing integer registers */
1395 4, /* cost of reg,reg fld/fst */
1396 {4, 4, 12}, /* cost of loading fp registers
1397 in SFmode, DFmode and XFmode */
1398 {6, 6, 8}, /* cost of storing fp registers
1399 in SFmode, DFmode and XFmode */
1400 2, /* cost of moving MMX register */
1401 {3, 3}, /* cost of loading MMX registers
1402 in SImode and DImode */
1403 {4, 4}, /* cost of storing MMX registers
1404 in SImode and DImode */
1405 2, /* cost of moving SSE register */
1406 {4, 4, 3}, /* cost of loading SSE registers
1407 in SImode, DImode and TImode */
1408 {4, 4, 5}, /* cost of storing SSE registers
1409 in SImode, DImode and TImode */
1410 3, /* MMX or SSE register to integer */
1411 /* On K8:
1412 MOVD reg64, xmmreg Double FSTORE 4
1413 MOVD reg32, xmmreg Double FSTORE 4
1414 On AMDFAM10:
1415 MOVD reg64, xmmreg Double FADD 3
1416 1/1 1/1
1417 MOVD reg32, xmmreg Double FADD 3
1418 1/1 1/1 */
1419 32, /* size of l1 cache. */
1420 2048, /* size of l2 cache. */
1421 64, /* size of prefetch block */
1422 100, /* number of parallel prefetches */
1423 2, /* Branch cost */
1424 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1425 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1426 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1427 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1428 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1429 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1430 btver2_memcpy,
1431 btver2_memset,
1432 4, /* scalar_stmt_cost. */
1433 2, /* scalar load_cost. */
1434 2, /* scalar_store_cost. */
1435 6, /* vec_stmt_cost. */
1436 0, /* vec_to_scalar_cost. */
1437 2, /* scalar_to_vec_cost. */
1438 2, /* vec_align_load_cost. */
1439 2, /* vec_unalign_load_cost. */
1440 2, /* vec_store_cost. */
1441 2, /* cond_taken_branch_cost. */
1442 1, /* cond_not_taken_branch_cost. */
1445 static stringop_algs pentium4_memcpy[2] = {
1446 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1447 DUMMY_STRINGOP_ALGS};
1448 static stringop_algs pentium4_memset[2] = {
1449 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1450 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1451 DUMMY_STRINGOP_ALGS};
1453 static const
1454 struct processor_costs pentium4_cost = {
1455 COSTS_N_INSNS (1), /* cost of an add instruction */
1456 COSTS_N_INSNS (3), /* cost of a lea instruction */
1457 COSTS_N_INSNS (4), /* variable shift costs */
1458 COSTS_N_INSNS (4), /* constant shift costs */
1459 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1460 COSTS_N_INSNS (15), /* HI */
1461 COSTS_N_INSNS (15), /* SI */
1462 COSTS_N_INSNS (15), /* DI */
1463 COSTS_N_INSNS (15)}, /* other */
1464 0, /* cost of multiply per each bit set */
1465 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1466 COSTS_N_INSNS (56), /* HI */
1467 COSTS_N_INSNS (56), /* SI */
1468 COSTS_N_INSNS (56), /* DI */
1469 COSTS_N_INSNS (56)}, /* other */
1470 COSTS_N_INSNS (1), /* cost of movsx */
1471 COSTS_N_INSNS (1), /* cost of movzx */
1472 16, /* "large" insn */
1473 6, /* MOVE_RATIO */
1474 2, /* cost for loading QImode using movzbl */
1475 {4, 5, 4}, /* cost of loading integer registers
1476 in QImode, HImode and SImode.
1477 Relative to reg-reg move (2). */
1478 {2, 3, 2}, /* cost of storing integer registers */
1479 2, /* cost of reg,reg fld/fst */
1480 {2, 2, 6}, /* cost of loading fp registers
1481 in SFmode, DFmode and XFmode */
1482 {4, 4, 6}, /* cost of storing fp registers
1483 in SFmode, DFmode and XFmode */
1484 2, /* cost of moving MMX register */
1485 {2, 2}, /* cost of loading MMX registers
1486 in SImode and DImode */
1487 {2, 2}, /* cost of storing MMX registers
1488 in SImode and DImode */
1489 12, /* cost of moving SSE register */
1490 {12, 12, 12}, /* cost of loading SSE registers
1491 in SImode, DImode and TImode */
1492 {2, 2, 8}, /* cost of storing SSE registers
1493 in SImode, DImode and TImode */
1494 10, /* MMX or SSE register to integer */
1495 8, /* size of l1 cache. */
1496 256, /* size of l2 cache. */
1497 64, /* size of prefetch block */
1498 6, /* number of parallel prefetches */
1499 2, /* Branch cost */
1500 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1501 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1502 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1503 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1504 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1505 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1506 pentium4_memcpy,
1507 pentium4_memset,
1508 1, /* scalar_stmt_cost. */
1509 1, /* scalar load_cost. */
1510 1, /* scalar_store_cost. */
1511 1, /* vec_stmt_cost. */
1512 1, /* vec_to_scalar_cost. */
1513 1, /* scalar_to_vec_cost. */
1514 1, /* vec_align_load_cost. */
1515 2, /* vec_unalign_load_cost. */
1516 1, /* vec_store_cost. */
1517 3, /* cond_taken_branch_cost. */
1518 1, /* cond_not_taken_branch_cost. */
1521 static stringop_algs nocona_memcpy[2] = {
1522 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1523 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1524 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1526 static stringop_algs nocona_memset[2] = {
1527 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1528 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1529 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1530 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1532 static const
1533 struct processor_costs nocona_cost = {
1534 COSTS_N_INSNS (1), /* cost of an add instruction */
1535 COSTS_N_INSNS (1), /* cost of a lea instruction */
1536 COSTS_N_INSNS (1), /* variable shift costs */
1537 COSTS_N_INSNS (1), /* constant shift costs */
1538 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1539 COSTS_N_INSNS (10), /* HI */
1540 COSTS_N_INSNS (10), /* SI */
1541 COSTS_N_INSNS (10), /* DI */
1542 COSTS_N_INSNS (10)}, /* other */
1543 0, /* cost of multiply per each bit set */
1544 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1545 COSTS_N_INSNS (66), /* HI */
1546 COSTS_N_INSNS (66), /* SI */
1547 COSTS_N_INSNS (66), /* DI */
1548 COSTS_N_INSNS (66)}, /* other */
1549 COSTS_N_INSNS (1), /* cost of movsx */
1550 COSTS_N_INSNS (1), /* cost of movzx */
1551 16, /* "large" insn */
1552 17, /* MOVE_RATIO */
1553 4, /* cost for loading QImode using movzbl */
1554 {4, 4, 4}, /* cost of loading integer registers
1555 in QImode, HImode and SImode.
1556 Relative to reg-reg move (2). */
1557 {4, 4, 4}, /* cost of storing integer registers */
1558 3, /* cost of reg,reg fld/fst */
1559 {12, 12, 12}, /* cost of loading fp registers
1560 in SFmode, DFmode and XFmode */
1561 {4, 4, 4}, /* cost of storing fp registers
1562 in SFmode, DFmode and XFmode */
1563 6, /* cost of moving MMX register */
1564 {12, 12}, /* cost of loading MMX registers
1565 in SImode and DImode */
1566 {12, 12}, /* cost of storing MMX registers
1567 in SImode and DImode */
1568 6, /* cost of moving SSE register */
1569 {12, 12, 12}, /* cost of loading SSE registers
1570 in SImode, DImode and TImode */
1571 {12, 12, 12}, /* cost of storing SSE registers
1572 in SImode, DImode and TImode */
1573 8, /* MMX or SSE register to integer */
1574 8, /* size of l1 cache. */
1575 1024, /* size of l2 cache. */
1576 64, /* size of prefetch block */
1577 8, /* number of parallel prefetches */
1578 1, /* Branch cost */
1579 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1580 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1581 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1582 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1583 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1584 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1585 nocona_memcpy,
1586 nocona_memset,
1587 1, /* scalar_stmt_cost. */
1588 1, /* scalar load_cost. */
1589 1, /* scalar_store_cost. */
1590 1, /* vec_stmt_cost. */
1591 1, /* vec_to_scalar_cost. */
1592 1, /* scalar_to_vec_cost. */
1593 1, /* vec_align_load_cost. */
1594 2, /* vec_unalign_load_cost. */
1595 1, /* vec_store_cost. */
1596 3, /* cond_taken_branch_cost. */
1597 1, /* cond_not_taken_branch_cost. */
1600 static stringop_algs atom_memcpy[2] = {
1601 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1602 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1603 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1604 static stringop_algs atom_memset[2] = {
1605 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1606 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1607 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1608 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1609 static const
1610 struct processor_costs atom_cost = {
1611 COSTS_N_INSNS (1), /* cost of an add instruction */
1612 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1613 COSTS_N_INSNS (1), /* variable shift costs */
1614 COSTS_N_INSNS (1), /* constant shift costs */
1615 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1616 COSTS_N_INSNS (4), /* HI */
1617 COSTS_N_INSNS (3), /* SI */
1618 COSTS_N_INSNS (4), /* DI */
1619 COSTS_N_INSNS (2)}, /* other */
1620 0, /* cost of multiply per each bit set */
1621 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1622 COSTS_N_INSNS (26), /* HI */
1623 COSTS_N_INSNS (42), /* SI */
1624 COSTS_N_INSNS (74), /* DI */
1625 COSTS_N_INSNS (74)}, /* other */
1626 COSTS_N_INSNS (1), /* cost of movsx */
1627 COSTS_N_INSNS (1), /* cost of movzx */
1628 8, /* "large" insn */
1629 17, /* MOVE_RATIO */
1630 4, /* cost for loading QImode using movzbl */
1631 {4, 4, 4}, /* cost of loading integer registers
1632 in QImode, HImode and SImode.
1633 Relative to reg-reg move (2). */
1634 {4, 4, 4}, /* cost of storing integer registers */
1635 4, /* cost of reg,reg fld/fst */
1636 {12, 12, 12}, /* cost of loading fp registers
1637 in SFmode, DFmode and XFmode */
1638 {6, 6, 8}, /* cost of storing fp registers
1639 in SFmode, DFmode and XFmode */
1640 2, /* cost of moving MMX register */
1641 {8, 8}, /* cost of loading MMX registers
1642 in SImode and DImode */
1643 {8, 8}, /* cost of storing MMX registers
1644 in SImode and DImode */
1645 2, /* cost of moving SSE register */
1646 {8, 8, 8}, /* cost of loading SSE registers
1647 in SImode, DImode and TImode */
1648 {8, 8, 8}, /* cost of storing SSE registers
1649 in SImode, DImode and TImode */
1650 5, /* MMX or SSE register to integer */
1651 32, /* size of l1 cache. */
1652 256, /* size of l2 cache. */
1653 64, /* size of prefetch block */
1654 6, /* number of parallel prefetches */
1655 3, /* Branch cost */
1656 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1657 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1658 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1659 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1660 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1661 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1662 atom_memcpy,
1663 atom_memset,
1664 1, /* scalar_stmt_cost. */
1665 1, /* scalar load_cost. */
1666 1, /* scalar_store_cost. */
1667 1, /* vec_stmt_cost. */
1668 1, /* vec_to_scalar_cost. */
1669 1, /* scalar_to_vec_cost. */
1670 1, /* vec_align_load_cost. */
1671 2, /* vec_unalign_load_cost. */
1672 1, /* vec_store_cost. */
1673 3, /* cond_taken_branch_cost. */
1674 1, /* cond_not_taken_branch_cost. */
1677 static stringop_algs slm_memcpy[2] = {
1678 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1679 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1680 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1681 static stringop_algs slm_memset[2] = {
1682 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1683 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1684 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1685 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1686 static const
1687 struct processor_costs slm_cost = {
1688 COSTS_N_INSNS (1), /* cost of an add instruction */
1689 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1690 COSTS_N_INSNS (1), /* variable shift costs */
1691 COSTS_N_INSNS (1), /* constant shift costs */
1692 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1693 COSTS_N_INSNS (3), /* HI */
1694 COSTS_N_INSNS (3), /* SI */
1695 COSTS_N_INSNS (4), /* DI */
1696 COSTS_N_INSNS (2)}, /* other */
1697 0, /* cost of multiply per each bit set */
1698 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1699 COSTS_N_INSNS (26), /* HI */
1700 COSTS_N_INSNS (42), /* SI */
1701 COSTS_N_INSNS (74), /* DI */
1702 COSTS_N_INSNS (74)}, /* other */
1703 COSTS_N_INSNS (1), /* cost of movsx */
1704 COSTS_N_INSNS (1), /* cost of movzx */
1705 8, /* "large" insn */
1706 17, /* MOVE_RATIO */
1707 4, /* cost for loading QImode using movzbl */
1708 {4, 4, 4}, /* cost of loading integer registers
1709 in QImode, HImode and SImode.
1710 Relative to reg-reg move (2). */
1711 {4, 4, 4}, /* cost of storing integer registers */
1712 4, /* cost of reg,reg fld/fst */
1713 {12, 12, 12}, /* cost of loading fp registers
1714 in SFmode, DFmode and XFmode */
1715 {6, 6, 8}, /* cost of storing fp registers
1716 in SFmode, DFmode and XFmode */
1717 2, /* cost of moving MMX register */
1718 {8, 8}, /* cost of loading MMX registers
1719 in SImode and DImode */
1720 {8, 8}, /* cost of storing MMX registers
1721 in SImode and DImode */
1722 2, /* cost of moving SSE register */
1723 {8, 8, 8}, /* cost of loading SSE registers
1724 in SImode, DImode and TImode */
1725 {8, 8, 8}, /* cost of storing SSE registers
1726 in SImode, DImode and TImode */
1727 5, /* MMX or SSE register to integer */
1728 32, /* size of l1 cache. */
1729 256, /* size of l2 cache. */
1730 64, /* size of prefetch block */
1731 6, /* number of parallel prefetches */
1732 3, /* Branch cost */
1733 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1734 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1735 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1736 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1737 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1738 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1739 slm_memcpy,
1740 slm_memset,
1741 1, /* scalar_stmt_cost. */
1742 1, /* scalar load_cost. */
1743 1, /* scalar_store_cost. */
1744 1, /* vec_stmt_cost. */
1745 4, /* vec_to_scalar_cost. */
1746 1, /* scalar_to_vec_cost. */
1747 1, /* vec_align_load_cost. */
1748 2, /* vec_unalign_load_cost. */
1749 1, /* vec_store_cost. */
1750 3, /* cond_taken_branch_cost. */
1751 1, /* cond_not_taken_branch_cost. */
1754 static stringop_algs intel_memcpy[2] = {
1755 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1756 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1757 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1758 static stringop_algs intel_memset[2] = {
1759 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1760 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1761 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1762 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1763 static const
1764 struct processor_costs intel_cost = {
1765 COSTS_N_INSNS (1), /* cost of an add instruction */
1766 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1767 COSTS_N_INSNS (1), /* variable shift costs */
1768 COSTS_N_INSNS (1), /* constant shift costs */
1769 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1770 COSTS_N_INSNS (3), /* HI */
1771 COSTS_N_INSNS (3), /* SI */
1772 COSTS_N_INSNS (4), /* DI */
1773 COSTS_N_INSNS (2)}, /* other */
1774 0, /* cost of multiply per each bit set */
1775 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1776 COSTS_N_INSNS (26), /* HI */
1777 COSTS_N_INSNS (42), /* SI */
1778 COSTS_N_INSNS (74), /* DI */
1779 COSTS_N_INSNS (74)}, /* other */
1780 COSTS_N_INSNS (1), /* cost of movsx */
1781 COSTS_N_INSNS (1), /* cost of movzx */
1782 8, /* "large" insn */
1783 17, /* MOVE_RATIO */
1784 4, /* cost for loading QImode using movzbl */
1785 {4, 4, 4}, /* cost of loading integer registers
1786 in QImode, HImode and SImode.
1787 Relative to reg-reg move (2). */
1788 {4, 4, 4}, /* cost of storing integer registers */
1789 4, /* cost of reg,reg fld/fst */
1790 {12, 12, 12}, /* cost of loading fp registers
1791 in SFmode, DFmode and XFmode */
1792 {6, 6, 8}, /* cost of storing fp registers
1793 in SFmode, DFmode and XFmode */
1794 2, /* cost of moving MMX register */
1795 {8, 8}, /* cost of loading MMX registers
1796 in SImode and DImode */
1797 {8, 8}, /* cost of storing MMX registers
1798 in SImode and DImode */
1799 2, /* cost of moving SSE register */
1800 {8, 8, 8}, /* cost of loading SSE registers
1801 in SImode, DImode and TImode */
1802 {8, 8, 8}, /* cost of storing SSE registers
1803 in SImode, DImode and TImode */
1804 5, /* MMX or SSE register to integer */
1805 32, /* size of l1 cache. */
1806 256, /* size of l2 cache. */
1807 64, /* size of prefetch block */
1808 6, /* number of parallel prefetches */
1809 3, /* Branch cost */
1810 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1811 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1812 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1813 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1814 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1815 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1816 intel_memcpy,
1817 intel_memset,
1818 1, /* scalar_stmt_cost. */
1819 1, /* scalar load_cost. */
1820 1, /* scalar_store_cost. */
1821 1, /* vec_stmt_cost. */
1822 4, /* vec_to_scalar_cost. */
1823 1, /* scalar_to_vec_cost. */
1824 1, /* vec_align_load_cost. */
1825 2, /* vec_unalign_load_cost. */
1826 1, /* vec_store_cost. */
1827 3, /* cond_taken_branch_cost. */
1828 1, /* cond_not_taken_branch_cost. */
1831 /* Generic should produce code tuned for Core-i7 (and newer chips)
1832 and btver1 (and newer chips). */
1834 static stringop_algs generic_memcpy[2] = {
1835 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1836 {-1, libcall, false}}},
1837 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1838 {-1, libcall, false}}}};
1839 static stringop_algs generic_memset[2] = {
1840 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1841 {-1, libcall, false}}},
1842 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1843 {-1, libcall, false}}}};
1844 static const
1845 struct processor_costs generic_cost = {
1846 COSTS_N_INSNS (1), /* cost of an add instruction */
1847 /* On all chips taken into consideration lea is 2 cycles and more. With
1848 this cost however our current implementation of synth_mult results in
1849 use of unnecessary temporary registers causing regression on several
1850 SPECfp benchmarks. */
1851 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1852 COSTS_N_INSNS (1), /* variable shift costs */
1853 COSTS_N_INSNS (1), /* constant shift costs */
1854 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1855 COSTS_N_INSNS (4), /* HI */
1856 COSTS_N_INSNS (3), /* SI */
1857 COSTS_N_INSNS (4), /* DI */
1858 COSTS_N_INSNS (2)}, /* other */
1859 0, /* cost of multiply per each bit set */
1860 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1861 COSTS_N_INSNS (26), /* HI */
1862 COSTS_N_INSNS (42), /* SI */
1863 COSTS_N_INSNS (74), /* DI */
1864 COSTS_N_INSNS (74)}, /* other */
1865 COSTS_N_INSNS (1), /* cost of movsx */
1866 COSTS_N_INSNS (1), /* cost of movzx */
1867 8, /* "large" insn */
1868 17, /* MOVE_RATIO */
1869 4, /* cost for loading QImode using movzbl */
1870 {4, 4, 4}, /* cost of loading integer registers
1871 in QImode, HImode and SImode.
1872 Relative to reg-reg move (2). */
1873 {4, 4, 4}, /* cost of storing integer registers */
1874 4, /* cost of reg,reg fld/fst */
1875 {12, 12, 12}, /* cost of loading fp registers
1876 in SFmode, DFmode and XFmode */
1877 {6, 6, 8}, /* cost of storing fp registers
1878 in SFmode, DFmode and XFmode */
1879 2, /* cost of moving MMX register */
1880 {8, 8}, /* cost of loading MMX registers
1881 in SImode and DImode */
1882 {8, 8}, /* cost of storing MMX registers
1883 in SImode and DImode */
1884 2, /* cost of moving SSE register */
1885 {8, 8, 8}, /* cost of loading SSE registers
1886 in SImode, DImode and TImode */
1887 {8, 8, 8}, /* cost of storing SSE registers
1888 in SImode, DImode and TImode */
1889 5, /* MMX or SSE register to integer */
1890 32, /* size of l1 cache. */
1891 512, /* size of l2 cache. */
1892 64, /* size of prefetch block */
1893 6, /* number of parallel prefetches */
1894 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1895 value is increased to perhaps more appropriate value of 5. */
1896 3, /* Branch cost */
1897 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1898 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1899 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1900 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1901 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1902 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1903 generic_memcpy,
1904 generic_memset,
1905 1, /* scalar_stmt_cost. */
1906 1, /* scalar load_cost. */
1907 1, /* scalar_store_cost. */
1908 1, /* vec_stmt_cost. */
1909 1, /* vec_to_scalar_cost. */
1910 1, /* scalar_to_vec_cost. */
1911 1, /* vec_align_load_cost. */
1912 2, /* vec_unalign_load_cost. */
1913 1, /* vec_store_cost. */
1914 3, /* cond_taken_branch_cost. */
1915 1, /* cond_not_taken_branch_cost. */
1918 /* core_cost should produce code tuned for Core familly of CPUs. */
1919 static stringop_algs core_memcpy[2] = {
1920 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1921 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1922 {-1, libcall, false}}}};
1923 static stringop_algs core_memset[2] = {
1924 {libcall, {{6, loop_1_byte, true},
1925 {24, loop, true},
1926 {8192, rep_prefix_4_byte, true},
1927 {-1, libcall, false}}},
1928 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1929 {-1, libcall, false}}}};
1931 static const
1932 struct processor_costs core_cost = {
1933 COSTS_N_INSNS (1), /* cost of an add instruction */
1934 /* On all chips taken into consideration lea is 2 cycles and more. With
1935 this cost however our current implementation of synth_mult results in
1936 use of unnecessary temporary registers causing regression on several
1937 SPECfp benchmarks. */
1938 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1939 COSTS_N_INSNS (1), /* variable shift costs */
1940 COSTS_N_INSNS (1), /* constant shift costs */
1941 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1942 COSTS_N_INSNS (4), /* HI */
1943 COSTS_N_INSNS (3), /* SI */
1944 COSTS_N_INSNS (4), /* DI */
1945 COSTS_N_INSNS (2)}, /* other */
1946 0, /* cost of multiply per each bit set */
1947 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1948 COSTS_N_INSNS (26), /* HI */
1949 COSTS_N_INSNS (42), /* SI */
1950 COSTS_N_INSNS (74), /* DI */
1951 COSTS_N_INSNS (74)}, /* other */
1952 COSTS_N_INSNS (1), /* cost of movsx */
1953 COSTS_N_INSNS (1), /* cost of movzx */
1954 8, /* "large" insn */
1955 17, /* MOVE_RATIO */
1956 4, /* cost for loading QImode using movzbl */
1957 {4, 4, 4}, /* cost of loading integer registers
1958 in QImode, HImode and SImode.
1959 Relative to reg-reg move (2). */
1960 {4, 4, 4}, /* cost of storing integer registers */
1961 4, /* cost of reg,reg fld/fst */
1962 {12, 12, 12}, /* cost of loading fp registers
1963 in SFmode, DFmode and XFmode */
1964 {6, 6, 8}, /* cost of storing fp registers
1965 in SFmode, DFmode and XFmode */
1966 2, /* cost of moving MMX register */
1967 {8, 8}, /* cost of loading MMX registers
1968 in SImode and DImode */
1969 {8, 8}, /* cost of storing MMX registers
1970 in SImode and DImode */
1971 2, /* cost of moving SSE register */
1972 {8, 8, 8}, /* cost of loading SSE registers
1973 in SImode, DImode and TImode */
1974 {8, 8, 8}, /* cost of storing SSE registers
1975 in SImode, DImode and TImode */
1976 5, /* MMX or SSE register to integer */
1977 64, /* size of l1 cache. */
1978 512, /* size of l2 cache. */
1979 64, /* size of prefetch block */
1980 6, /* number of parallel prefetches */
1981 /* FIXME perhaps more appropriate value is 5. */
1982 3, /* Branch cost */
1983 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1984 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1985 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1986 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1987 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1988 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1989 core_memcpy,
1990 core_memset,
1991 1, /* scalar_stmt_cost. */
1992 1, /* scalar load_cost. */
1993 1, /* scalar_store_cost. */
1994 1, /* vec_stmt_cost. */
1995 1, /* vec_to_scalar_cost. */
1996 1, /* scalar_to_vec_cost. */
1997 1, /* vec_align_load_cost. */
1998 2, /* vec_unalign_load_cost. */
1999 1, /* vec_store_cost. */
2000 3, /* cond_taken_branch_cost. */
2001 1, /* cond_not_taken_branch_cost. */
2005 /* Set by -mtune. */
2006 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2008 /* Set by -mtune or -Os. */
2009 const struct processor_costs *ix86_cost = &pentium_cost;
2011 /* Processor feature/optimization bitmasks. */
2012 #define m_386 (1<<PROCESSOR_I386)
2013 #define m_486 (1<<PROCESSOR_I486)
2014 #define m_PENT (1<<PROCESSOR_PENTIUM)
2015 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2016 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2017 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2018 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2019 #define m_CORE2 (1<<PROCESSOR_CORE2)
2020 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2021 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2022 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2023 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2024 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2025 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2026 #define m_INTEL (1<<PROCESSOR_INTEL)
2028 #define m_GEODE (1<<PROCESSOR_GEODE)
2029 #define m_K6 (1<<PROCESSOR_K6)
2030 #define m_K6_GEODE (m_K6 | m_GEODE)
2031 #define m_K8 (1<<PROCESSOR_K8)
2032 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2033 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2034 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2035 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2036 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2037 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2038 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2039 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2040 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2041 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2042 #define m_BTVER (m_BTVER1 | m_BTVER2)
2043 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2045 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2047 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2048 #undef DEF_TUNE
2049 #define DEF_TUNE(tune, name, selector) name,
2050 #include "x86-tune.def"
2051 #undef DEF_TUNE
2054 /* Feature tests against the various tunings. */
2055 unsigned char ix86_tune_features[X86_TUNE_LAST];
2057 /* Feature tests against the various tunings used to create ix86_tune_features
2058 based on the processor mask. */
2059 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2060 #undef DEF_TUNE
2061 #define DEF_TUNE(tune, name, selector) selector,
2062 #include "x86-tune.def"
2063 #undef DEF_TUNE
2066 /* Feature tests against the various architecture variations. */
2067 unsigned char ix86_arch_features[X86_ARCH_LAST];
2069 /* Feature tests against the various architecture variations, used to create
2070 ix86_arch_features based on the processor mask. */
2071 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2072 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2073 ~(m_386 | m_486 | m_PENT | m_K6),
2075 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2076 ~m_386,
2078 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2079 ~(m_386 | m_486),
2081 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2082 ~m_386,
2084 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2085 ~m_386,
2088 /* In case the average insn count for single function invocation is
2089 lower than this constant, emit fast (but longer) prologue and
2090 epilogue code. */
2091 #define FAST_PROLOGUE_INSN_COUNT 20
2093 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2094 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2095 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2096 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2098 /* Array of the smallest class containing reg number REGNO, indexed by
2099 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2101 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2103 /* ax, dx, cx, bx */
2104 AREG, DREG, CREG, BREG,
2105 /* si, di, bp, sp */
2106 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2107 /* FP registers */
2108 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2109 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2110 /* arg pointer */
2111 NON_Q_REGS,
2112 /* flags, fpsr, fpcr, frame */
2113 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2114 /* SSE registers */
2115 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2116 SSE_REGS, SSE_REGS,
2117 /* MMX registers */
2118 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2119 MMX_REGS, MMX_REGS,
2120 /* REX registers */
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2123 /* SSE REX registers */
2124 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2125 SSE_REGS, SSE_REGS,
2126 /* AVX-512 SSE registers */
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2131 /* Mask registers. */
2132 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2136 /* The "default" register map used in 32bit mode. */
2138 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2140 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2141 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2142 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2143 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2144 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2148 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2149 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2152 /* The "default" register map used in 64bit mode. */
2154 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2156 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2157 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2158 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2159 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2160 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2161 8,9,10,11,12,13,14,15, /* extended integer registers */
2162 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2163 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2164 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2165 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2168 /* Define the register numbers to be used in Dwarf debugging information.
2169 The SVR4 reference port C compiler uses the following register numbers
2170 in its Dwarf output code:
2171 0 for %eax (gcc regno = 0)
2172 1 for %ecx (gcc regno = 2)
2173 2 for %edx (gcc regno = 1)
2174 3 for %ebx (gcc regno = 3)
2175 4 for %esp (gcc regno = 7)
2176 5 for %ebp (gcc regno = 6)
2177 6 for %esi (gcc regno = 4)
2178 7 for %edi (gcc regno = 5)
2179 The following three DWARF register numbers are never generated by
2180 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2181 believes these numbers have these meanings.
2182 8 for %eip (no gcc equivalent)
2183 9 for %eflags (gcc regno = 17)
2184 10 for %trapno (no gcc equivalent)
2185 It is not at all clear how we should number the FP stack registers
2186 for the x86 architecture. If the version of SDB on x86/svr4 were
2187 a bit less brain dead with respect to floating-point then we would
2188 have a precedent to follow with respect to DWARF register numbers
2189 for x86 FP registers, but the SDB on x86/svr4 is so completely
2190 broken with respect to FP registers that it is hardly worth thinking
2191 of it as something to strive for compatibility with.
2192 The version of x86/svr4 SDB I have at the moment does (partially)
2193 seem to believe that DWARF register number 11 is associated with
2194 the x86 register %st(0), but that's about all. Higher DWARF
2195 register numbers don't seem to be associated with anything in
2196 particular, and even for DWARF regno 11, SDB only seems to under-
2197 stand that it should say that a variable lives in %st(0) (when
2198 asked via an `=' command) if we said it was in DWARF regno 11,
2199 but SDB still prints garbage when asked for the value of the
2200 variable in question (via a `/' command).
2201 (Also note that the labels SDB prints for various FP stack regs
2202 when doing an `x' command are all wrong.)
2203 Note that these problems generally don't affect the native SVR4
2204 C compiler because it doesn't allow the use of -O with -g and
2205 because when it is *not* optimizing, it allocates a memory
2206 location for each floating-point variable, and the memory
2207 location is what gets described in the DWARF AT_location
2208 attribute for the variable in question.
2209 Regardless of the severe mental illness of the x86/svr4 SDB, we
2210 do something sensible here and we use the following DWARF
2211 register numbers. Note that these are all stack-top-relative
2212 numbers.
2213 11 for %st(0) (gcc regno = 8)
2214 12 for %st(1) (gcc regno = 9)
2215 13 for %st(2) (gcc regno = 10)
2216 14 for %st(3) (gcc regno = 11)
2217 15 for %st(4) (gcc regno = 12)
2218 16 for %st(5) (gcc regno = 13)
2219 17 for %st(6) (gcc regno = 14)
2220 18 for %st(7) (gcc regno = 15)
2222 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2224 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2225 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2226 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2227 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2228 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2232 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2233 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2236 /* Define parameter passing and return registers. */
2238 static int const x86_64_int_parameter_registers[6] =
2240 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2243 static int const x86_64_ms_abi_int_parameter_registers[4] =
2245 CX_REG, DX_REG, R8_REG, R9_REG
2248 static int const x86_64_int_return_registers[4] =
2250 AX_REG, DX_REG, DI_REG, SI_REG
2253 /* Additional registers that are clobbered by SYSV calls. */
2255 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2257 SI_REG, DI_REG,
2258 XMM6_REG, XMM7_REG,
2259 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2260 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2263 /* Define the structure for the machine field in struct function. */
2265 struct GTY(()) stack_local_entry {
2266 unsigned short mode;
2267 unsigned short n;
2268 rtx rtl;
2269 struct stack_local_entry *next;
2272 /* Structure describing stack frame layout.
2273 Stack grows downward:
2275 [arguments]
2276 <- ARG_POINTER
2277 saved pc
2279 saved static chain if ix86_static_chain_on_stack
2281 saved frame pointer if frame_pointer_needed
2282 <- HARD_FRAME_POINTER
2283 [saved regs]
2284 <- regs_save_offset
2285 [padding0]
2287 [saved SSE regs]
2288 <- sse_regs_save_offset
2289 [padding1] |
2290 | <- FRAME_POINTER
2291 [va_arg registers] |
2293 [frame] |
2295 [padding2] | = to_allocate
2296 <- STACK_POINTER
2298 struct ix86_frame
2300 int nsseregs;
2301 int nregs;
2302 int va_arg_size;
2303 int red_zone_size;
2304 int outgoing_arguments_size;
2306 /* The offsets relative to ARG_POINTER. */
2307 HOST_WIDE_INT frame_pointer_offset;
2308 HOST_WIDE_INT hard_frame_pointer_offset;
2309 HOST_WIDE_INT stack_pointer_offset;
2310 HOST_WIDE_INT hfp_save_offset;
2311 HOST_WIDE_INT reg_save_offset;
2312 HOST_WIDE_INT sse_reg_save_offset;
2314 /* When save_regs_using_mov is set, emit prologue using
2315 move instead of push instructions. */
2316 bool save_regs_using_mov;
2319 /* Which cpu are we scheduling for. */
2320 enum attr_cpu ix86_schedule;
2322 /* Which cpu are we optimizing for. */
2323 enum processor_type ix86_tune;
2325 /* Which instruction set architecture to use. */
2326 enum processor_type ix86_arch;
2328 /* True if processor has SSE prefetch instruction. */
2329 unsigned char x86_prefetch_sse;
2331 /* -mstackrealign option */
2332 static const char ix86_force_align_arg_pointer_string[]
2333 = "force_align_arg_pointer";
2335 static rtx (*ix86_gen_leave) (void);
2336 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2338 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2339 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2340 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2342 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2343 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2346 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2348 /* Preferred alignment for stack boundary in bits. */
2349 unsigned int ix86_preferred_stack_boundary;
2351 /* Alignment for incoming stack boundary in bits specified at
2352 command line. */
2353 static unsigned int ix86_user_incoming_stack_boundary;
2355 /* Default alignment for incoming stack boundary in bits. */
2356 static unsigned int ix86_default_incoming_stack_boundary;
2358 /* Alignment for incoming stack boundary in bits. */
2359 unsigned int ix86_incoming_stack_boundary;
2361 /* Calling abi specific va_list type nodes. */
2362 static GTY(()) tree sysv_va_list_type_node;
2363 static GTY(()) tree ms_va_list_type_node;
2365 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2366 char internal_label_prefix[16];
2367 int internal_label_prefix_len;
2369 /* Fence to use after loop using movnt. */
2370 tree x86_mfence;
2372 /* Register class used for passing given 64bit part of the argument.
2373 These represent classes as documented by the PS ABI, with the exception
2374 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2375 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2377 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2378 whenever possible (upper half does contain padding). */
2379 enum x86_64_reg_class
2381 X86_64_NO_CLASS,
2382 X86_64_INTEGER_CLASS,
2383 X86_64_INTEGERSI_CLASS,
2384 X86_64_SSE_CLASS,
2385 X86_64_SSESF_CLASS,
2386 X86_64_SSEDF_CLASS,
2387 X86_64_SSEUP_CLASS,
2388 X86_64_X87_CLASS,
2389 X86_64_X87UP_CLASS,
2390 X86_64_COMPLEX_X87_CLASS,
2391 X86_64_MEMORY_CLASS
2394 #define MAX_CLASSES 8
2396 /* Table of constants used by fldpi, fldln2, etc.... */
2397 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2398 static bool ext_80387_constants_init = 0;
2401 static struct machine_function * ix86_init_machine_status (void);
2402 static rtx ix86_function_value (const_tree, const_tree, bool);
2403 static bool ix86_function_value_regno_p (const unsigned int);
2404 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2405 const_tree);
2406 static rtx ix86_static_chain (const_tree, bool);
2407 static int ix86_function_regparm (const_tree, const_tree);
2408 static void ix86_compute_frame_layout (struct ix86_frame *);
2409 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2410 rtx, rtx, int);
2411 static void ix86_add_new_builtins (HOST_WIDE_INT);
2412 static tree ix86_canonical_va_list_type (tree);
2413 static void predict_jump (int);
2414 static unsigned int split_stack_prologue_scratch_regno (void);
2415 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2417 enum ix86_function_specific_strings
2419 IX86_FUNCTION_SPECIFIC_ARCH,
2420 IX86_FUNCTION_SPECIFIC_TUNE,
2421 IX86_FUNCTION_SPECIFIC_MAX
2424 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2425 const char *, enum fpmath_unit, bool);
2426 static void ix86_function_specific_save (struct cl_target_option *,
2427 struct gcc_options *opts);
2428 static void ix86_function_specific_restore (struct gcc_options *opts,
2429 struct cl_target_option *);
2430 static void ix86_function_specific_print (FILE *, int,
2431 struct cl_target_option *);
2432 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2433 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2434 struct gcc_options *,
2435 struct gcc_options *,
2436 struct gcc_options *);
2437 static bool ix86_can_inline_p (tree, tree);
2438 static void ix86_set_current_function (tree);
2439 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2441 static enum calling_abi ix86_function_abi (const_tree);
2444 #ifndef SUBTARGET32_DEFAULT_CPU
2445 #define SUBTARGET32_DEFAULT_CPU "i386"
2446 #endif
2448 /* Whether -mtune= or -march= were specified */
2449 static int ix86_tune_defaulted;
2450 static int ix86_arch_specified;
2452 /* Vectorization library interface and handlers. */
2453 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2456 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2458 /* Processor target table, indexed by processor number */
2459 struct ptt
2461 const char *const name; /* processor name */
2462 const struct processor_costs *cost; /* Processor costs */
2463 const int align_loop; /* Default alignments. */
2464 const int align_loop_max_skip;
2465 const int align_jump;
2466 const int align_jump_max_skip;
2467 const int align_func;
2470 /* This table must be in sync with enum processor_type in i386.h. */
2471 static const struct ptt processor_target_table[PROCESSOR_max] =
2473 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2474 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2475 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2476 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2477 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2478 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2479 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2480 {"core2", &core_cost, 16, 10, 16, 10, 16},
2481 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2482 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2483 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2484 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2485 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2486 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2487 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2488 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2489 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2490 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2491 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2492 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2493 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2494 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2495 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2496 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2497 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2500 static unsigned int
2501 rest_of_handle_insert_vzeroupper (void)
2503 int i;
2505 /* vzeroupper instructions are inserted immediately after reload to
2506 account for possible spills from 256bit registers. The pass
2507 reuses mode switching infrastructure by re-running mode insertion
2508 pass, so disable entities that have already been processed. */
2509 for (i = 0; i < MAX_386_ENTITIES; i++)
2510 ix86_optimize_mode_switching[i] = 0;
2512 ix86_optimize_mode_switching[AVX_U128] = 1;
2514 /* Call optimize_mode_switching. */
2515 g->get_passes ()->execute_pass_mode_switching ();
2516 return 0;
2519 namespace {
2521 const pass_data pass_data_insert_vzeroupper =
2523 RTL_PASS, /* type */
2524 "vzeroupper", /* name */
2525 OPTGROUP_NONE, /* optinfo_flags */
2526 TV_NONE, /* tv_id */
2527 0, /* properties_required */
2528 0, /* properties_provided */
2529 0, /* properties_destroyed */
2530 0, /* todo_flags_start */
2531 TODO_df_finish, /* todo_flags_finish */
2534 class pass_insert_vzeroupper : public rtl_opt_pass
2536 public:
2537 pass_insert_vzeroupper(gcc::context *ctxt)
2538 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2541 /* opt_pass methods: */
2542 virtual bool gate (function *)
2544 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2547 virtual unsigned int execute (function *)
2549 return rest_of_handle_insert_vzeroupper ();
2552 }; // class pass_insert_vzeroupper
2554 } // anon namespace
2556 rtl_opt_pass *
2557 make_pass_insert_vzeroupper (gcc::context *ctxt)
2559 return new pass_insert_vzeroupper (ctxt);
2562 /* Return true if a red-zone is in use. */
2564 static inline bool
2565 ix86_using_red_zone (void)
2567 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2570 /* Return a string that documents the current -m options. The caller is
2571 responsible for freeing the string. */
2573 static char *
2574 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2575 const char *tune, enum fpmath_unit fpmath,
2576 bool add_nl_p)
2578 struct ix86_target_opts
2580 const char *option; /* option string */
2581 HOST_WIDE_INT mask; /* isa mask options */
2584 /* This table is ordered so that options like -msse4.2 that imply
2585 preceding options while match those first. */
2586 static struct ix86_target_opts isa_opts[] =
2588 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2589 { "-mfma", OPTION_MASK_ISA_FMA },
2590 { "-mxop", OPTION_MASK_ISA_XOP },
2591 { "-mlwp", OPTION_MASK_ISA_LWP },
2592 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2593 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2594 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2595 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2596 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2597 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2598 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2599 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2600 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2601 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2602 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2603 { "-msse3", OPTION_MASK_ISA_SSE3 },
2604 { "-msse2", OPTION_MASK_ISA_SSE2 },
2605 { "-msse", OPTION_MASK_ISA_SSE },
2606 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2607 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2608 { "-mmmx", OPTION_MASK_ISA_MMX },
2609 { "-mabm", OPTION_MASK_ISA_ABM },
2610 { "-mbmi", OPTION_MASK_ISA_BMI },
2611 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2612 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2613 { "-mhle", OPTION_MASK_ISA_HLE },
2614 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2615 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2616 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2617 { "-madx", OPTION_MASK_ISA_ADX },
2618 { "-mtbm", OPTION_MASK_ISA_TBM },
2619 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2620 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2621 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2622 { "-maes", OPTION_MASK_ISA_AES },
2623 { "-msha", OPTION_MASK_ISA_SHA },
2624 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2625 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2626 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2627 { "-mf16c", OPTION_MASK_ISA_F16C },
2628 { "-mrtm", OPTION_MASK_ISA_RTM },
2629 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2630 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2631 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2632 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2633 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2634 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2637 /* Flag options. */
2638 static struct ix86_target_opts flag_opts[] =
2640 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2641 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2642 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2643 { "-m80387", MASK_80387 },
2644 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2645 { "-malign-double", MASK_ALIGN_DOUBLE },
2646 { "-mcld", MASK_CLD },
2647 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2648 { "-mieee-fp", MASK_IEEE_FP },
2649 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2650 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2651 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2652 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2653 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2654 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2655 { "-mno-red-zone", MASK_NO_RED_ZONE },
2656 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2657 { "-mrecip", MASK_RECIP },
2658 { "-mrtd", MASK_RTD },
2659 { "-msseregparm", MASK_SSEREGPARM },
2660 { "-mstack-arg-probe", MASK_STACK_PROBE },
2661 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2662 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2663 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2664 { "-mvzeroupper", MASK_VZEROUPPER },
2665 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2666 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2667 { "-mprefer-avx128", MASK_PREFER_AVX128},
2670 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2672 char isa_other[40];
2673 char target_other[40];
2674 unsigned num = 0;
2675 unsigned i, j;
2676 char *ret;
2677 char *ptr;
2678 size_t len;
2679 size_t line_len;
2680 size_t sep_len;
2681 const char *abi;
2683 memset (opts, '\0', sizeof (opts));
2685 /* Add -march= option. */
2686 if (arch)
2688 opts[num][0] = "-march=";
2689 opts[num++][1] = arch;
2692 /* Add -mtune= option. */
2693 if (tune)
2695 opts[num][0] = "-mtune=";
2696 opts[num++][1] = tune;
2699 /* Add -m32/-m64/-mx32. */
2700 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2702 if ((isa & OPTION_MASK_ABI_64) != 0)
2703 abi = "-m64";
2704 else
2705 abi = "-mx32";
2706 isa &= ~ (OPTION_MASK_ISA_64BIT
2707 | OPTION_MASK_ABI_64
2708 | OPTION_MASK_ABI_X32);
2710 else
2711 abi = "-m32";
2712 opts[num++][0] = abi;
2714 /* Pick out the options in isa options. */
2715 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2717 if ((isa & isa_opts[i].mask) != 0)
2719 opts[num++][0] = isa_opts[i].option;
2720 isa &= ~ isa_opts[i].mask;
2724 if (isa && add_nl_p)
2726 opts[num++][0] = isa_other;
2727 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2728 isa);
2731 /* Add flag options. */
2732 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2734 if ((flags & flag_opts[i].mask) != 0)
2736 opts[num++][0] = flag_opts[i].option;
2737 flags &= ~ flag_opts[i].mask;
2741 if (flags && add_nl_p)
2743 opts[num++][0] = target_other;
2744 sprintf (target_other, "(other flags: %#x)", flags);
2747 /* Add -fpmath= option. */
2748 if (fpmath)
2750 opts[num][0] = "-mfpmath=";
2751 switch ((int) fpmath)
2753 case FPMATH_387:
2754 opts[num++][1] = "387";
2755 break;
2757 case FPMATH_SSE:
2758 opts[num++][1] = "sse";
2759 break;
2761 case FPMATH_387 | FPMATH_SSE:
2762 opts[num++][1] = "sse+387";
2763 break;
2765 default:
2766 gcc_unreachable ();
2770 /* Any options? */
2771 if (num == 0)
2772 return NULL;
2774 gcc_assert (num < ARRAY_SIZE (opts));
2776 /* Size the string. */
2777 len = 0;
2778 sep_len = (add_nl_p) ? 3 : 1;
2779 for (i = 0; i < num; i++)
2781 len += sep_len;
2782 for (j = 0; j < 2; j++)
2783 if (opts[i][j])
2784 len += strlen (opts[i][j]);
2787 /* Build the string. */
2788 ret = ptr = (char *) xmalloc (len);
2789 line_len = 0;
2791 for (i = 0; i < num; i++)
2793 size_t len2[2];
2795 for (j = 0; j < 2; j++)
2796 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2798 if (i != 0)
2800 *ptr++ = ' ';
2801 line_len++;
2803 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2805 *ptr++ = '\\';
2806 *ptr++ = '\n';
2807 line_len = 0;
2811 for (j = 0; j < 2; j++)
2812 if (opts[i][j])
2814 memcpy (ptr, opts[i][j], len2[j]);
2815 ptr += len2[j];
2816 line_len += len2[j];
2820 *ptr = '\0';
2821 gcc_assert (ret + len >= ptr);
2823 return ret;
2826 /* Return true, if profiling code should be emitted before
2827 prologue. Otherwise it returns false.
2828 Note: For x86 with "hotfix" it is sorried. */
2829 static bool
2830 ix86_profile_before_prologue (void)
2832 return flag_fentry != 0;
2835 /* Function that is callable from the debugger to print the current
2836 options. */
2837 void ATTRIBUTE_UNUSED
2838 ix86_debug_options (void)
2840 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2841 ix86_arch_string, ix86_tune_string,
2842 ix86_fpmath, true);
2844 if (opts)
2846 fprintf (stderr, "%s\n\n", opts);
2847 free (opts);
2849 else
2850 fputs ("<no options>\n\n", stderr);
2852 return;
2855 static const char *stringop_alg_names[] = {
2856 #define DEF_ENUM
2857 #define DEF_ALG(alg, name) #name,
2858 #include "stringop.def"
2859 #undef DEF_ENUM
2860 #undef DEF_ALG
2863 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2864 The string is of the following form (or comma separated list of it):
2866 strategy_alg:max_size:[align|noalign]
2868 where the full size range for the strategy is either [0, max_size] or
2869 [min_size, max_size], in which min_size is the max_size + 1 of the
2870 preceding range. The last size range must have max_size == -1.
2872 Examples:
2875 -mmemcpy-strategy=libcall:-1:noalign
2877 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2881 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2883 This is to tell the compiler to use the following strategy for memset
2884 1) when the expected size is between [1, 16], use rep_8byte strategy;
2885 2) when the size is between [17, 2048], use vector_loop;
2886 3) when the size is > 2048, use libcall. */
2888 struct stringop_size_range
2890 int max;
2891 stringop_alg alg;
2892 bool noalign;
2895 static void
2896 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2898 const struct stringop_algs *default_algs;
2899 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2900 char *curr_range_str, *next_range_str;
2901 int i = 0, n = 0;
2903 if (is_memset)
2904 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2905 else
2906 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2908 curr_range_str = strategy_str;
2912 int maxs;
2913 char alg_name[128];
2914 char align[16];
2915 next_range_str = strchr (curr_range_str, ',');
2916 if (next_range_str)
2917 *next_range_str++ = '\0';
2919 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2920 alg_name, &maxs, align))
2922 error ("wrong arg %s to option %s", curr_range_str,
2923 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2924 return;
2927 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2929 error ("size ranges of option %s should be increasing",
2930 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2931 return;
2934 for (i = 0; i < last_alg; i++)
2935 if (!strcmp (alg_name, stringop_alg_names[i]))
2936 break;
2938 if (i == last_alg)
2940 error ("wrong stringop strategy name %s specified for option %s",
2941 alg_name,
2942 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2943 return;
2946 input_ranges[n].max = maxs;
2947 input_ranges[n].alg = (stringop_alg) i;
2948 if (!strcmp (align, "align"))
2949 input_ranges[n].noalign = false;
2950 else if (!strcmp (align, "noalign"))
2951 input_ranges[n].noalign = true;
2952 else
2954 error ("unknown alignment %s specified for option %s",
2955 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2956 return;
2958 n++;
2959 curr_range_str = next_range_str;
2961 while (curr_range_str);
2963 if (input_ranges[n - 1].max != -1)
2965 error ("the max value for the last size range should be -1"
2966 " for option %s",
2967 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2968 return;
2971 if (n > MAX_STRINGOP_ALGS)
2973 error ("too many size ranges specified in option %s",
2974 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2975 return;
2978 /* Now override the default algs array. */
2979 for (i = 0; i < n; i++)
2981 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2982 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2983 = input_ranges[i].alg;
2984 *const_cast<int *>(&default_algs->size[i].noalign)
2985 = input_ranges[i].noalign;
2990 /* parse -mtune-ctrl= option. When DUMP is true,
2991 print the features that are explicitly set. */
2993 static void
2994 parse_mtune_ctrl_str (bool dump)
2996 if (!ix86_tune_ctrl_string)
2997 return;
2999 char *next_feature_string = NULL;
3000 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3001 char *orig = curr_feature_string;
3002 int i;
3005 bool clear = false;
3007 next_feature_string = strchr (curr_feature_string, ',');
3008 if (next_feature_string)
3009 *next_feature_string++ = '\0';
3010 if (*curr_feature_string == '^')
3012 curr_feature_string++;
3013 clear = true;
3015 for (i = 0; i < X86_TUNE_LAST; i++)
3017 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3019 ix86_tune_features[i] = !clear;
3020 if (dump)
3021 fprintf (stderr, "Explicitly %s feature %s\n",
3022 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3023 break;
3026 if (i == X86_TUNE_LAST)
3027 error ("Unknown parameter to option -mtune-ctrl: %s",
3028 clear ? curr_feature_string - 1 : curr_feature_string);
3029 curr_feature_string = next_feature_string;
3031 while (curr_feature_string);
3032 free (orig);
3035 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3036 processor type. */
3038 static void
3039 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3041 unsigned int ix86_tune_mask = 1u << ix86_tune;
3042 int i;
3044 for (i = 0; i < X86_TUNE_LAST; ++i)
3046 if (ix86_tune_no_default)
3047 ix86_tune_features[i] = 0;
3048 else
3049 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3052 if (dump)
3054 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3055 for (i = 0; i < X86_TUNE_LAST; i++)
3056 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3057 ix86_tune_features[i] ? "on" : "off");
3060 parse_mtune_ctrl_str (dump);
3064 /* Override various settings based on options. If MAIN_ARGS_P, the
3065 options are from the command line, otherwise they are from
3066 attributes. */
3068 static void
3069 ix86_option_override_internal (bool main_args_p,
3070 struct gcc_options *opts,
3071 struct gcc_options *opts_set)
3073 int i;
3074 unsigned int ix86_arch_mask;
3075 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3076 const char *prefix;
3077 const char *suffix;
3078 const char *sw;
3080 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3081 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3082 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3083 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3084 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3085 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3086 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3087 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3088 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3089 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3090 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3091 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3092 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3093 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3094 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3095 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3096 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3097 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3098 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3099 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3100 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3101 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3102 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3103 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3104 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3105 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3106 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3107 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3108 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3109 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3110 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3111 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3112 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3113 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3114 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3115 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3116 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3117 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3118 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3119 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3120 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3121 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3122 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3123 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3124 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3125 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3126 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3127 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3128 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3129 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3130 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3131 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3133 #define PTA_CORE2 \
3134 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3135 | PTA_CX16 | PTA_FXSR)
3136 #define PTA_NEHALEM \
3137 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3138 #define PTA_WESTMERE \
3139 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3140 #define PTA_SANDYBRIDGE \
3141 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3142 #define PTA_IVYBRIDGE \
3143 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3144 #define PTA_HASWELL \
3145 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3146 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3147 #define PTA_BROADWELL \
3148 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3149 #define PTA_BONNELL \
3150 (PTA_CORE2 | PTA_MOVBE)
3151 #define PTA_SILVERMONT \
3152 (PTA_WESTMERE | PTA_MOVBE)
3154 /* if this reaches 64, need to widen struct pta flags below */
3156 static struct pta
3158 const char *const name; /* processor name or nickname. */
3159 const enum processor_type processor;
3160 const enum attr_cpu schedule;
3161 const unsigned HOST_WIDE_INT flags;
3163 const processor_alias_table[] =
3165 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3166 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3167 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3169 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3170 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3171 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3173 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3174 PTA_MMX | PTA_SSE | PTA_FXSR},
3175 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3177 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3178 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3179 PTA_MMX | PTA_SSE | PTA_FXSR},
3180 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3181 PTA_MMX | PTA_SSE | PTA_FXSR},
3182 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3183 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3184 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3185 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3186 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3187 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3188 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3189 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3190 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3191 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3192 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3193 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3194 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3196 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3197 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3198 PTA_SANDYBRIDGE},
3199 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3200 PTA_SANDYBRIDGE},
3201 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3202 PTA_IVYBRIDGE},
3203 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3204 PTA_IVYBRIDGE},
3205 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3207 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3208 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3210 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3212 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3213 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3214 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3215 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3216 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3218 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3219 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3220 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3221 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3222 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3223 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3224 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3225 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3226 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3227 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3228 {"x86-64", PROCESSOR_K8, CPU_K8,
3229 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3230 {"k8", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"opteron", PROCESSOR_K8, CPU_K8,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3238 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3239 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3241 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3242 {"athlon64", PROCESSOR_K8, CPU_K8,
3243 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3244 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3245 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3246 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3247 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3248 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3249 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3250 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3251 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3252 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3253 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3254 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3255 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3256 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3257 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3258 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3260 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3261 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3262 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3263 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3264 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3265 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3266 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3267 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3268 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3269 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3270 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3271 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3272 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3273 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3274 | PTA_XSAVEOPT | PTA_FSGSBASE},
3275 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3276 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3277 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3278 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3279 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3280 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3281 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3298 /* -mrecip options. */
3299 static struct
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3304 const recip_options[] =
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3324 else
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3351 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3352 #endif
3354 #endif
3356 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3358 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3359 OPTION_MASK_ABI_64 for TARGET_X32. */
3360 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3361 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3363 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3364 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3365 | OPTION_MASK_ABI_X32
3366 | OPTION_MASK_ABI_64);
3367 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3369 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3370 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3371 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3372 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3375 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3376 SUBTARGET_OVERRIDE_OPTIONS;
3377 #endif
3379 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3380 SUBSUBTARGET_OVERRIDE_OPTIONS;
3381 #endif
3383 /* -fPIC is the default for x86_64. */
3384 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3385 opts->x_flag_pic = 2;
3387 /* Need to check -mtune=generic first. */
3388 if (opts->x_ix86_tune_string)
3390 /* As special support for cross compilers we read -mtune=native
3391 as -mtune=generic. With native compilers we won't see the
3392 -mtune=native, as it was changed by the driver. */
3393 if (!strcmp (opts->x_ix86_tune_string, "native"))
3395 opts->x_ix86_tune_string = "generic";
3397 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3398 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3399 "%stune=k8%s or %stune=generic%s instead as appropriate",
3400 prefix, suffix, prefix, suffix, prefix, suffix);
3402 else
3404 if (opts->x_ix86_arch_string)
3405 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3406 if (!opts->x_ix86_tune_string)
3408 opts->x_ix86_tune_string
3409 = processor_target_table[TARGET_CPU_DEFAULT].name;
3410 ix86_tune_defaulted = 1;
3413 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3414 or defaulted. We need to use a sensible tune option. */
3415 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3417 opts->x_ix86_tune_string = "generic";
3421 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3422 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3424 /* rep; movq isn't available in 32-bit code. */
3425 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3426 opts->x_ix86_stringop_alg = no_stringop;
3429 if (!opts->x_ix86_arch_string)
3430 opts->x_ix86_arch_string
3431 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3432 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3433 else
3434 ix86_arch_specified = 1;
3436 if (opts_set->x_ix86_pmode)
3438 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3439 && opts->x_ix86_pmode == PMODE_SI)
3440 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3441 && opts->x_ix86_pmode == PMODE_DI))
3442 error ("address mode %qs not supported in the %s bit mode",
3443 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3446 else
3447 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3448 ? PMODE_DI : PMODE_SI;
3450 if (!opts_set->x_ix86_abi)
3451 opts->x_ix86_abi = DEFAULT_ABI;
3453 /* For targets using ms ABI enable ms-extensions, if not
3454 explicit turned off. For non-ms ABI we turn off this
3455 option. */
3456 if (!opts_set->x_flag_ms_extensions)
3457 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3459 if (opts_set->x_ix86_cmodel)
3461 switch (opts->x_ix86_cmodel)
3463 case CM_SMALL:
3464 case CM_SMALL_PIC:
3465 if (opts->x_flag_pic)
3466 opts->x_ix86_cmodel = CM_SMALL_PIC;
3467 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3468 error ("code model %qs not supported in the %s bit mode",
3469 "small", "32");
3470 break;
3472 case CM_MEDIUM:
3473 case CM_MEDIUM_PIC:
3474 if (opts->x_flag_pic)
3475 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3476 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in the %s bit mode",
3478 "medium", "32");
3479 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3480 error ("code model %qs not supported in x32 mode",
3481 "medium");
3482 break;
3484 case CM_LARGE:
3485 case CM_LARGE_PIC:
3486 if (opts->x_flag_pic)
3487 opts->x_ix86_cmodel = CM_LARGE_PIC;
3488 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3489 error ("code model %qs not supported in the %s bit mode",
3490 "large", "32");
3491 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3492 error ("code model %qs not supported in x32 mode",
3493 "large");
3494 break;
3496 case CM_32:
3497 if (opts->x_flag_pic)
3498 error ("code model %s does not support PIC mode", "32");
3499 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3500 error ("code model %qs not supported in the %s bit mode",
3501 "32", "64");
3502 break;
3504 case CM_KERNEL:
3505 if (opts->x_flag_pic)
3507 error ("code model %s does not support PIC mode", "kernel");
3508 opts->x_ix86_cmodel = CM_32;
3510 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3511 error ("code model %qs not supported in the %s bit mode",
3512 "kernel", "32");
3513 break;
3515 default:
3516 gcc_unreachable ();
3519 else
3521 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3522 use of rip-relative addressing. This eliminates fixups that
3523 would otherwise be needed if this object is to be placed in a
3524 DLL, and is essentially just as efficient as direct addressing. */
3525 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3526 && (TARGET_RDOS || TARGET_PECOFF))
3527 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3528 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3529 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3530 else
3531 opts->x_ix86_cmodel = CM_32;
3533 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3535 error ("-masm=intel not supported in this configuration");
3536 opts->x_ix86_asm_dialect = ASM_ATT;
3538 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3539 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3540 sorry ("%i-bit mode not compiled in",
3541 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3543 for (i = 0; i < pta_size; i++)
3544 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3546 ix86_schedule = processor_alias_table[i].schedule;
3547 ix86_arch = processor_alias_table[i].processor;
3548 /* Default cpu tuning to the architecture. */
3549 ix86_tune = ix86_arch;
3551 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3552 && !(processor_alias_table[i].flags & PTA_64BIT))
3553 error ("CPU you selected does not support x86-64 "
3554 "instruction set");
3556 if (processor_alias_table[i].flags & PTA_MMX
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3559 if (processor_alias_table[i].flags & PTA_3DNOW
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3562 if (processor_alias_table[i].flags & PTA_3DNOW_A
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3565 if (processor_alias_table[i].flags & PTA_SSE
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3568 if (processor_alias_table[i].flags & PTA_SSE2
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3571 if (processor_alias_table[i].flags & PTA_SSE3
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3574 if (processor_alias_table[i].flags & PTA_SSSE3
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3577 if (processor_alias_table[i].flags & PTA_SSE4_1
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3580 if (processor_alias_table[i].flags & PTA_SSE4_2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3583 if (processor_alias_table[i].flags & PTA_AVX
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3586 if (processor_alias_table[i].flags & PTA_AVX2
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3589 if (processor_alias_table[i].flags & PTA_FMA
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3592 if (processor_alias_table[i].flags & PTA_SSE4A
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3595 if (processor_alias_table[i].flags & PTA_FMA4
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3598 if (processor_alias_table[i].flags & PTA_XOP
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3601 if (processor_alias_table[i].flags & PTA_LWP
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3604 if (processor_alias_table[i].flags & PTA_ABM
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3607 if (processor_alias_table[i].flags & PTA_BMI
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3610 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3613 if (processor_alias_table[i].flags & PTA_TBM
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3616 if (processor_alias_table[i].flags & PTA_BMI2
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3619 if (processor_alias_table[i].flags & PTA_CX16
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3622 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3625 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3626 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3629 if (processor_alias_table[i].flags & PTA_MOVBE
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3632 if (processor_alias_table[i].flags & PTA_AES
3633 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3634 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3635 if (processor_alias_table[i].flags & PTA_SHA
3636 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3637 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3638 if (processor_alias_table[i].flags & PTA_PCLMUL
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3641 if (processor_alias_table[i].flags & PTA_FSGSBASE
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3644 if (processor_alias_table[i].flags & PTA_RDRND
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3647 if (processor_alias_table[i].flags & PTA_F16C
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3650 if (processor_alias_table[i].flags & PTA_RTM
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3653 if (processor_alias_table[i].flags & PTA_HLE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3656 if (processor_alias_table[i].flags & PTA_PRFCHW
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3659 if (processor_alias_table[i].flags & PTA_RDSEED
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3662 if (processor_alias_table[i].flags & PTA_ADX
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3665 if (processor_alias_table[i].flags & PTA_FXSR
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3668 if (processor_alias_table[i].flags & PTA_XSAVE
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3671 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3674 if (processor_alias_table[i].flags & PTA_AVX512F
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3677 if (processor_alias_table[i].flags & PTA_AVX512ER
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3680 if (processor_alias_table[i].flags & PTA_AVX512PF
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3683 if (processor_alias_table[i].flags & PTA_AVX512CD
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3686 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3689 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3692 if (processor_alias_table[i].flags & PTA_XSAVEC
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3695 if (processor_alias_table[i].flags & PTA_XSAVES
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3698 if (processor_alias_table[i].flags & PTA_AVX512DQ
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3701 if (processor_alias_table[i].flags & PTA_AVX512BW
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3704 if (processor_alias_table[i].flags & PTA_AVX512VL
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3707 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3708 x86_prefetch_sse = true;
3710 break;
3713 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3714 error ("generic CPU can be used only for %stune=%s %s",
3715 prefix, suffix, sw);
3716 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3717 error ("intel CPU can be used only for %stune=%s %s",
3718 prefix, suffix, sw);
3719 else if (i == pta_size)
3720 error ("bad value (%s) for %sarch=%s %s",
3721 opts->x_ix86_arch_string, prefix, suffix, sw);
3723 ix86_arch_mask = 1u << ix86_arch;
3724 for (i = 0; i < X86_ARCH_LAST; ++i)
3725 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3727 for (i = 0; i < pta_size; i++)
3728 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3730 ix86_schedule = processor_alias_table[i].schedule;
3731 ix86_tune = processor_alias_table[i].processor;
3732 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3734 if (!(processor_alias_table[i].flags & PTA_64BIT))
3736 if (ix86_tune_defaulted)
3738 opts->x_ix86_tune_string = "x86-64";
3739 for (i = 0; i < pta_size; i++)
3740 if (! strcmp (opts->x_ix86_tune_string,
3741 processor_alias_table[i].name))
3742 break;
3743 ix86_schedule = processor_alias_table[i].schedule;
3744 ix86_tune = processor_alias_table[i].processor;
3746 else
3747 error ("CPU you selected does not support x86-64 "
3748 "instruction set");
3751 /* Intel CPUs have always interpreted SSE prefetch instructions as
3752 NOPs; so, we can enable SSE prefetch instructions even when
3753 -mtune (rather than -march) points us to a processor that has them.
3754 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3755 higher processors. */
3756 if (TARGET_CMOV
3757 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3758 x86_prefetch_sse = true;
3759 break;
3762 if (ix86_tune_specified && i == pta_size)
3763 error ("bad value (%s) for %stune=%s %s",
3764 opts->x_ix86_tune_string, prefix, suffix, sw);
3766 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3768 #ifndef USE_IX86_FRAME_POINTER
3769 #define USE_IX86_FRAME_POINTER 0
3770 #endif
3772 #ifndef USE_X86_64_FRAME_POINTER
3773 #define USE_X86_64_FRAME_POINTER 0
3774 #endif
3776 /* Set the default values for switches whose default depends on TARGET_64BIT
3777 in case they weren't overwritten by command line options. */
3778 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3780 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3781 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3782 if (opts->x_flag_asynchronous_unwind_tables
3783 && !opts_set->x_flag_unwind_tables
3784 && TARGET_64BIT_MS_ABI)
3785 opts->x_flag_unwind_tables = 1;
3786 if (opts->x_flag_asynchronous_unwind_tables == 2)
3787 opts->x_flag_unwind_tables
3788 = opts->x_flag_asynchronous_unwind_tables = 1;
3789 if (opts->x_flag_pcc_struct_return == 2)
3790 opts->x_flag_pcc_struct_return = 0;
3792 else
3794 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3795 opts->x_flag_omit_frame_pointer
3796 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3797 if (opts->x_flag_asynchronous_unwind_tables == 2)
3798 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3799 if (opts->x_flag_pcc_struct_return == 2)
3800 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3803 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3804 if (opts->x_optimize_size)
3805 ix86_cost = &ix86_size_cost;
3806 else
3807 ix86_cost = ix86_tune_cost;
3809 /* Arrange to set up i386_stack_locals for all functions. */
3810 init_machine_status = ix86_init_machine_status;
3812 /* Validate -mregparm= value. */
3813 if (opts_set->x_ix86_regparm)
3815 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3816 warning (0, "-mregparm is ignored in 64-bit mode");
3817 if (opts->x_ix86_regparm > REGPARM_MAX)
3819 error ("-mregparm=%d is not between 0 and %d",
3820 opts->x_ix86_regparm, REGPARM_MAX);
3821 opts->x_ix86_regparm = 0;
3824 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3825 opts->x_ix86_regparm = REGPARM_MAX;
3827 /* Default align_* from the processor table. */
3828 if (opts->x_align_loops == 0)
3830 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3831 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3833 if (opts->x_align_jumps == 0)
3835 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3836 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3838 if (opts->x_align_functions == 0)
3840 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3843 /* Provide default for -mbranch-cost= value. */
3844 if (!opts_set->x_ix86_branch_cost)
3845 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3847 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3849 opts->x_target_flags
3850 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3852 /* Enable by default the SSE and MMX builtins. Do allow the user to
3853 explicitly disable any of these. In particular, disabling SSE and
3854 MMX for kernel code is extremely useful. */
3855 if (!ix86_arch_specified)
3856 opts->x_ix86_isa_flags
3857 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3858 | TARGET_SUBTARGET64_ISA_DEFAULT)
3859 & ~opts->x_ix86_isa_flags_explicit);
3861 if (TARGET_RTD_P (opts->x_target_flags))
3862 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3864 else
3866 opts->x_target_flags
3867 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3869 if (!ix86_arch_specified)
3870 opts->x_ix86_isa_flags
3871 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3873 /* i386 ABI does not specify red zone. It still makes sense to use it
3874 when programmer takes care to stack from being destroyed. */
3875 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3876 opts->x_target_flags |= MASK_NO_RED_ZONE;
3879 /* Keep nonleaf frame pointers. */
3880 if (opts->x_flag_omit_frame_pointer)
3881 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3882 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3883 opts->x_flag_omit_frame_pointer = 1;
3885 /* If we're doing fast math, we don't care about comparison order
3886 wrt NaNs. This lets us use a shorter comparison sequence. */
3887 if (opts->x_flag_finite_math_only)
3888 opts->x_target_flags &= ~MASK_IEEE_FP;
3890 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3891 since the insns won't need emulation. */
3892 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3893 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3895 /* Likewise, if the target doesn't have a 387, or we've specified
3896 software floating point, don't use 387 inline intrinsics. */
3897 if (!TARGET_80387_P (opts->x_target_flags))
3898 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3900 /* Turn on MMX builtins for -msse. */
3901 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3902 opts->x_ix86_isa_flags
3903 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3905 /* Enable SSE prefetch. */
3906 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3907 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3908 x86_prefetch_sse = true;
3910 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3911 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3912 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3913 opts->x_ix86_isa_flags
3914 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3916 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3917 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3918 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3919 opts->x_ix86_isa_flags
3920 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3922 /* Enable lzcnt instruction for -mabm. */
3923 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3924 opts->x_ix86_isa_flags
3925 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3927 /* Validate -mpreferred-stack-boundary= value or default it to
3928 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3929 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3930 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3932 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3933 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3934 int max = (TARGET_SEH ? 4 : 12);
3936 if (opts->x_ix86_preferred_stack_boundary_arg < min
3937 || opts->x_ix86_preferred_stack_boundary_arg > max)
3939 if (min == max)
3940 error ("-mpreferred-stack-boundary is not supported "
3941 "for this target");
3942 else
3943 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3944 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3946 else
3947 ix86_preferred_stack_boundary
3948 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3951 /* Set the default value for -mstackrealign. */
3952 if (opts->x_ix86_force_align_arg_pointer == -1)
3953 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3955 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3957 /* Validate -mincoming-stack-boundary= value or default it to
3958 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3959 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3960 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3962 if (opts->x_ix86_incoming_stack_boundary_arg
3963 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3964 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3965 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3966 opts->x_ix86_incoming_stack_boundary_arg,
3967 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3968 else
3970 ix86_user_incoming_stack_boundary
3971 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3972 ix86_incoming_stack_boundary
3973 = ix86_user_incoming_stack_boundary;
3977 /* Accept -msseregparm only if at least SSE support is enabled. */
3978 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3979 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3980 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3982 if (opts_set->x_ix86_fpmath)
3984 if (opts->x_ix86_fpmath & FPMATH_SSE)
3986 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3988 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3989 opts->x_ix86_fpmath = FPMATH_387;
3991 else if ((opts->x_ix86_fpmath & FPMATH_387)
3992 && !TARGET_80387_P (opts->x_target_flags))
3994 warning (0, "387 instruction set disabled, using SSE arithmetics");
3995 opts->x_ix86_fpmath = FPMATH_SSE;
3999 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4000 fpmath=387. The second is however default at many targets since the
4001 extra 80bit precision of temporaries is considered to be part of ABI.
4002 Overwrite the default at least for -ffast-math.
4003 TODO: -mfpmath=both seems to produce same performing code with bit
4004 smaller binaries. It is however not clear if register allocation is
4005 ready for this setting.
4006 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4007 codegen. We may switch to 387 with -ffast-math for size optimized
4008 functions. */
4009 else if (fast_math_flags_set_p (&global_options)
4010 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4011 opts->x_ix86_fpmath = FPMATH_SSE;
4012 else
4013 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4015 /* If the i387 is disabled, then do not return values in it. */
4016 if (!TARGET_80387_P (opts->x_target_flags))
4017 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4019 /* Use external vectorized library in vectorizing intrinsics. */
4020 if (opts_set->x_ix86_veclibabi_type)
4021 switch (opts->x_ix86_veclibabi_type)
4023 case ix86_veclibabi_type_svml:
4024 ix86_veclib_handler = ix86_veclibabi_svml;
4025 break;
4027 case ix86_veclibabi_type_acml:
4028 ix86_veclib_handler = ix86_veclibabi_acml;
4029 break;
4031 default:
4032 gcc_unreachable ();
4035 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4036 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4037 && !opts->x_optimize_size)
4038 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4040 /* If stack probes are required, the space used for large function
4041 arguments on the stack must also be probed, so enable
4042 -maccumulate-outgoing-args so this happens in the prologue. */
4043 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4044 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4046 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4047 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4048 "for correctness", prefix, suffix);
4049 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4052 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4054 char *p;
4055 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4056 p = strchr (internal_label_prefix, 'X');
4057 internal_label_prefix_len = p - internal_label_prefix;
4058 *p = '\0';
4061 /* When scheduling description is not available, disable scheduler pass
4062 so it won't slow down the compilation and make x87 code slower. */
4063 if (!TARGET_SCHEDULE)
4064 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4066 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4067 ix86_tune_cost->simultaneous_prefetches,
4068 opts->x_param_values,
4069 opts_set->x_param_values);
4070 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4071 ix86_tune_cost->prefetch_block,
4072 opts->x_param_values,
4073 opts_set->x_param_values);
4074 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4075 ix86_tune_cost->l1_cache_size,
4076 opts->x_param_values,
4077 opts_set->x_param_values);
4078 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4079 ix86_tune_cost->l2_cache_size,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4083 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4084 if (opts->x_flag_prefetch_loop_arrays < 0
4085 && HAVE_prefetch
4086 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4087 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4088 opts->x_flag_prefetch_loop_arrays = 1;
4090 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4091 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4092 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4093 targetm.expand_builtin_va_start = NULL;
4095 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4097 ix86_gen_leave = gen_leave_rex64;
4098 if (Pmode == DImode)
4100 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4101 ix86_gen_tls_local_dynamic_base_64
4102 = gen_tls_local_dynamic_base_64_di;
4104 else
4106 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4107 ix86_gen_tls_local_dynamic_base_64
4108 = gen_tls_local_dynamic_base_64_si;
4111 else
4112 ix86_gen_leave = gen_leave;
4114 if (Pmode == DImode)
4116 ix86_gen_add3 = gen_adddi3;
4117 ix86_gen_sub3 = gen_subdi3;
4118 ix86_gen_sub3_carry = gen_subdi3_carry;
4119 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4120 ix86_gen_andsp = gen_anddi3;
4121 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4122 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4123 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4124 ix86_gen_monitor = gen_sse3_monitor_di;
4126 else
4128 ix86_gen_add3 = gen_addsi3;
4129 ix86_gen_sub3 = gen_subsi3;
4130 ix86_gen_sub3_carry = gen_subsi3_carry;
4131 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4132 ix86_gen_andsp = gen_andsi3;
4133 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4134 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4135 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4136 ix86_gen_monitor = gen_sse3_monitor_si;
4139 #ifdef USE_IX86_CLD
4140 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4141 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4142 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4143 #endif
4145 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4147 if (opts->x_flag_fentry > 0)
4148 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4149 "with -fpic");
4150 opts->x_flag_fentry = 0;
4152 else if (TARGET_SEH)
4154 if (opts->x_flag_fentry == 0)
4155 sorry ("-mno-fentry isn%'t compatible with SEH");
4156 opts->x_flag_fentry = 1;
4158 else if (opts->x_flag_fentry < 0)
4160 #if defined(PROFILE_BEFORE_PROLOGUE)
4161 opts->x_flag_fentry = 1;
4162 #else
4163 opts->x_flag_fentry = 0;
4164 #endif
4167 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4168 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4169 AVX unaligned load/store. */
4170 if (!opts->x_optimize_size)
4172 if (flag_expensive_optimizations
4173 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4174 opts->x_target_flags |= MASK_VZEROUPPER;
4175 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4176 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4177 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4178 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4179 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4180 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4181 /* Enable 128-bit AVX instruction generation
4182 for the auto-vectorizer. */
4183 if (TARGET_AVX128_OPTIMAL
4184 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4185 opts->x_target_flags |= MASK_PREFER_AVX128;
4188 if (opts->x_ix86_recip_name)
4190 char *p = ASTRDUP (opts->x_ix86_recip_name);
4191 char *q;
4192 unsigned int mask, i;
4193 bool invert;
4195 while ((q = strtok (p, ",")) != NULL)
4197 p = NULL;
4198 if (*q == '!')
4200 invert = true;
4201 q++;
4203 else
4204 invert = false;
4206 if (!strcmp (q, "default"))
4207 mask = RECIP_MASK_ALL;
4208 else
4210 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4211 if (!strcmp (q, recip_options[i].string))
4213 mask = recip_options[i].mask;
4214 break;
4217 if (i == ARRAY_SIZE (recip_options))
4219 error ("unknown option for -mrecip=%s", q);
4220 invert = false;
4221 mask = RECIP_MASK_NONE;
4225 opts->x_recip_mask_explicit |= mask;
4226 if (invert)
4227 opts->x_recip_mask &= ~mask;
4228 else
4229 opts->x_recip_mask |= mask;
4233 if (TARGET_RECIP_P (opts->x_target_flags))
4234 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4235 else if (opts_set->x_target_flags & MASK_RECIP)
4236 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4238 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4239 for 64-bit Bionic. */
4240 if (TARGET_HAS_BIONIC
4241 && !(opts_set->x_target_flags
4242 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4243 opts->x_target_flags |= (TARGET_64BIT
4244 ? MASK_LONG_DOUBLE_128
4245 : MASK_LONG_DOUBLE_64);
4247 /* Only one of them can be active. */
4248 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4249 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4251 /* Save the initial options in case the user does function specific
4252 options. */
4253 if (main_args_p)
4254 target_option_default_node = target_option_current_node
4255 = build_target_option_node (opts);
4257 /* Handle stack protector */
4258 if (!opts_set->x_ix86_stack_protector_guard)
4259 opts->x_ix86_stack_protector_guard
4260 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4262 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4263 if (opts->x_ix86_tune_memcpy_strategy)
4265 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4266 ix86_parse_stringop_strategy_string (str, false);
4267 free (str);
4270 if (opts->x_ix86_tune_memset_strategy)
4272 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4273 ix86_parse_stringop_strategy_string (str, true);
4274 free (str);
4278 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4280 static void
4281 ix86_option_override (void)
4283 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4284 static struct register_pass_info insert_vzeroupper_info
4285 = { pass_insert_vzeroupper, "reload",
4286 1, PASS_POS_INSERT_AFTER
4289 ix86_option_override_internal (true, &global_options, &global_options_set);
4292 /* This needs to be done at start up. It's convenient to do it here. */
4293 register_pass (&insert_vzeroupper_info);
4296 /* Update register usage after having seen the compiler flags. */
4298 static void
4299 ix86_conditional_register_usage (void)
4301 int i, c_mask;
4302 unsigned int j;
4304 /* The PIC register, if it exists, is fixed. */
4305 j = PIC_OFFSET_TABLE_REGNUM;
4306 if (j != INVALID_REGNUM)
4307 fixed_regs[j] = call_used_regs[j] = 1;
4309 /* For 32-bit targets, squash the REX registers. */
4310 if (! TARGET_64BIT)
4312 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4313 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4314 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4315 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4316 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4317 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4320 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4321 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4322 : TARGET_64BIT ? (1 << 2)
4323 : (1 << 1));
4325 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4327 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4329 /* Set/reset conditionally defined registers from
4330 CALL_USED_REGISTERS initializer. */
4331 if (call_used_regs[i] > 1)
4332 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4334 /* Calculate registers of CLOBBERED_REGS register set
4335 as call used registers from GENERAL_REGS register set. */
4336 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4337 && call_used_regs[i])
4338 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4341 /* If MMX is disabled, squash the registers. */
4342 if (! TARGET_MMX)
4343 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4344 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4345 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4347 /* If SSE is disabled, squash the registers. */
4348 if (! TARGET_SSE)
4349 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4350 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4351 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4353 /* If the FPU is disabled, squash the registers. */
4354 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4356 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4357 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4359 /* If AVX512F is disabled, squash the registers. */
4360 if (! TARGET_AVX512F)
4362 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4363 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4365 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4366 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4371 /* Save the current options */
4373 static void
4374 ix86_function_specific_save (struct cl_target_option *ptr,
4375 struct gcc_options *opts)
4377 ptr->arch = ix86_arch;
4378 ptr->schedule = ix86_schedule;
4379 ptr->tune = ix86_tune;
4380 ptr->branch_cost = ix86_branch_cost;
4381 ptr->tune_defaulted = ix86_tune_defaulted;
4382 ptr->arch_specified = ix86_arch_specified;
4383 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4384 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4385 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4386 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4387 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4388 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4389 ptr->x_ix86_abi = opts->x_ix86_abi;
4390 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4391 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4392 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4393 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4394 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4395 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4396 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4397 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4398 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4399 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4400 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4401 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4402 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4403 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4404 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4405 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4406 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4407 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4408 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4409 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4411 /* The fields are char but the variables are not; make sure the
4412 values fit in the fields. */
4413 gcc_assert (ptr->arch == ix86_arch);
4414 gcc_assert (ptr->schedule == ix86_schedule);
4415 gcc_assert (ptr->tune == ix86_tune);
4416 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4419 /* Restore the current options */
4421 static void
4422 ix86_function_specific_restore (struct gcc_options *opts,
4423 struct cl_target_option *ptr)
4425 enum processor_type old_tune = ix86_tune;
4426 enum processor_type old_arch = ix86_arch;
4427 unsigned int ix86_arch_mask;
4428 int i;
4430 /* We don't change -fPIC. */
4431 opts->x_flag_pic = flag_pic;
4433 ix86_arch = (enum processor_type) ptr->arch;
4434 ix86_schedule = (enum attr_cpu) ptr->schedule;
4435 ix86_tune = (enum processor_type) ptr->tune;
4436 opts->x_ix86_branch_cost = ptr->branch_cost;
4437 ix86_tune_defaulted = ptr->tune_defaulted;
4438 ix86_arch_specified = ptr->arch_specified;
4439 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4440 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4441 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4442 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4443 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4444 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4445 opts->x_ix86_abi = ptr->x_ix86_abi;
4446 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4447 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4448 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4449 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4450 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4451 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4452 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4453 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4454 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4455 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4456 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4457 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4458 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4459 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4460 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4461 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4462 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4463 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4464 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4465 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4467 /* Recreate the arch feature tests if the arch changed */
4468 if (old_arch != ix86_arch)
4470 ix86_arch_mask = 1u << ix86_arch;
4471 for (i = 0; i < X86_ARCH_LAST; ++i)
4472 ix86_arch_features[i]
4473 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4476 /* Recreate the tune optimization tests */
4477 if (old_tune != ix86_tune)
4478 set_ix86_tune_features (ix86_tune, false);
4481 /* Print the current options */
4483 static void
4484 ix86_function_specific_print (FILE *file, int indent,
4485 struct cl_target_option *ptr)
4487 char *target_string
4488 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4489 NULL, NULL, ptr->x_ix86_fpmath, false);
4491 gcc_assert (ptr->arch < PROCESSOR_max);
4492 fprintf (file, "%*sarch = %d (%s)\n",
4493 indent, "",
4494 ptr->arch, processor_target_table[ptr->arch].name);
4496 gcc_assert (ptr->tune < PROCESSOR_max);
4497 fprintf (file, "%*stune = %d (%s)\n",
4498 indent, "",
4499 ptr->tune, processor_target_table[ptr->tune].name);
4501 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4503 if (target_string)
4505 fprintf (file, "%*s%s\n", indent, "", target_string);
4506 free (target_string);
4511 /* Inner function to process the attribute((target(...))), take an argument and
4512 set the current options from the argument. If we have a list, recursively go
4513 over the list. */
4515 static bool
4516 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4517 struct gcc_options *opts,
4518 struct gcc_options *opts_set,
4519 struct gcc_options *enum_opts_set)
4521 char *next_optstr;
4522 bool ret = true;
4524 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4525 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4526 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4527 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4528 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4530 enum ix86_opt_type
4532 ix86_opt_unknown,
4533 ix86_opt_yes,
4534 ix86_opt_no,
4535 ix86_opt_str,
4536 ix86_opt_enum,
4537 ix86_opt_isa
4540 static const struct
4542 const char *string;
4543 size_t len;
4544 enum ix86_opt_type type;
4545 int opt;
4546 int mask;
4547 } attrs[] = {
4548 /* isa options */
4549 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4550 IX86_ATTR_ISA ("abm", OPT_mabm),
4551 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4552 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4553 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4554 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4555 IX86_ATTR_ISA ("aes", OPT_maes),
4556 IX86_ATTR_ISA ("sha", OPT_msha),
4557 IX86_ATTR_ISA ("avx", OPT_mavx),
4558 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4559 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4560 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4561 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4562 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4563 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4564 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4565 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4566 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4567 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4568 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4569 IX86_ATTR_ISA ("sse", OPT_msse),
4570 IX86_ATTR_ISA ("sse2", OPT_msse2),
4571 IX86_ATTR_ISA ("sse3", OPT_msse3),
4572 IX86_ATTR_ISA ("sse4", OPT_msse4),
4573 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4574 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4575 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4576 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4577 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4578 IX86_ATTR_ISA ("fma", OPT_mfma),
4579 IX86_ATTR_ISA ("xop", OPT_mxop),
4580 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4581 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4582 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4583 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4584 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4585 IX86_ATTR_ISA ("hle", OPT_mhle),
4586 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4587 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4588 IX86_ATTR_ISA ("adx", OPT_madx),
4589 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4590 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4591 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4592 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4593 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4594 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4595 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4597 /* enum options */
4598 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4600 /* string options */
4601 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4602 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4604 /* flag options */
4605 IX86_ATTR_YES ("cld",
4606 OPT_mcld,
4607 MASK_CLD),
4609 IX86_ATTR_NO ("fancy-math-387",
4610 OPT_mfancy_math_387,
4611 MASK_NO_FANCY_MATH_387),
4613 IX86_ATTR_YES ("ieee-fp",
4614 OPT_mieee_fp,
4615 MASK_IEEE_FP),
4617 IX86_ATTR_YES ("inline-all-stringops",
4618 OPT_minline_all_stringops,
4619 MASK_INLINE_ALL_STRINGOPS),
4621 IX86_ATTR_YES ("inline-stringops-dynamically",
4622 OPT_minline_stringops_dynamically,
4623 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4625 IX86_ATTR_NO ("align-stringops",
4626 OPT_mno_align_stringops,
4627 MASK_NO_ALIGN_STRINGOPS),
4629 IX86_ATTR_YES ("recip",
4630 OPT_mrecip,
4631 MASK_RECIP),
4635 /* If this is a list, recurse to get the options. */
4636 if (TREE_CODE (args) == TREE_LIST)
4638 bool ret = true;
4640 for (; args; args = TREE_CHAIN (args))
4641 if (TREE_VALUE (args)
4642 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4643 p_strings, opts, opts_set,
4644 enum_opts_set))
4645 ret = false;
4647 return ret;
4650 else if (TREE_CODE (args) != STRING_CST)
4652 error ("attribute %<target%> argument not a string");
4653 return false;
4656 /* Handle multiple arguments separated by commas. */
4657 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4659 while (next_optstr && *next_optstr != '\0')
4661 char *p = next_optstr;
4662 char *orig_p = p;
4663 char *comma = strchr (next_optstr, ',');
4664 const char *opt_string;
4665 size_t len, opt_len;
4666 int opt;
4667 bool opt_set_p;
4668 char ch;
4669 unsigned i;
4670 enum ix86_opt_type type = ix86_opt_unknown;
4671 int mask = 0;
4673 if (comma)
4675 *comma = '\0';
4676 len = comma - next_optstr;
4677 next_optstr = comma + 1;
4679 else
4681 len = strlen (p);
4682 next_optstr = NULL;
4685 /* Recognize no-xxx. */
4686 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4688 opt_set_p = false;
4689 p += 3;
4690 len -= 3;
4692 else
4693 opt_set_p = true;
4695 /* Find the option. */
4696 ch = *p;
4697 opt = N_OPTS;
4698 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4700 type = attrs[i].type;
4701 opt_len = attrs[i].len;
4702 if (ch == attrs[i].string[0]
4703 && ((type != ix86_opt_str && type != ix86_opt_enum)
4704 ? len == opt_len
4705 : len > opt_len)
4706 && memcmp (p, attrs[i].string, opt_len) == 0)
4708 opt = attrs[i].opt;
4709 mask = attrs[i].mask;
4710 opt_string = attrs[i].string;
4711 break;
4715 /* Process the option. */
4716 if (opt == N_OPTS)
4718 error ("attribute(target(\"%s\")) is unknown", orig_p);
4719 ret = false;
4722 else if (type == ix86_opt_isa)
4724 struct cl_decoded_option decoded;
4726 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4727 ix86_handle_option (opts, opts_set,
4728 &decoded, input_location);
4731 else if (type == ix86_opt_yes || type == ix86_opt_no)
4733 if (type == ix86_opt_no)
4734 opt_set_p = !opt_set_p;
4736 if (opt_set_p)
4737 opts->x_target_flags |= mask;
4738 else
4739 opts->x_target_flags &= ~mask;
4742 else if (type == ix86_opt_str)
4744 if (p_strings[opt])
4746 error ("option(\"%s\") was already specified", opt_string);
4747 ret = false;
4749 else
4750 p_strings[opt] = xstrdup (p + opt_len);
4753 else if (type == ix86_opt_enum)
4755 bool arg_ok;
4756 int value;
4758 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4759 if (arg_ok)
4760 set_option (opts, enum_opts_set, opt, value,
4761 p + opt_len, DK_UNSPECIFIED, input_location,
4762 global_dc);
4763 else
4765 error ("attribute(target(\"%s\")) is unknown", orig_p);
4766 ret = false;
4770 else
4771 gcc_unreachable ();
4774 return ret;
4777 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4779 tree
4780 ix86_valid_target_attribute_tree (tree args,
4781 struct gcc_options *opts,
4782 struct gcc_options *opts_set)
4784 const char *orig_arch_string = opts->x_ix86_arch_string;
4785 const char *orig_tune_string = opts->x_ix86_tune_string;
4786 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4787 int orig_tune_defaulted = ix86_tune_defaulted;
4788 int orig_arch_specified = ix86_arch_specified;
4789 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4790 tree t = NULL_TREE;
4791 int i;
4792 struct cl_target_option *def
4793 = TREE_TARGET_OPTION (target_option_default_node);
4794 struct gcc_options enum_opts_set;
4796 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4798 /* Process each of the options on the chain. */
4799 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4800 opts_set, &enum_opts_set))
4801 return error_mark_node;
4803 /* If the changed options are different from the default, rerun
4804 ix86_option_override_internal, and then save the options away.
4805 The string options are are attribute options, and will be undone
4806 when we copy the save structure. */
4807 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4808 || opts->x_target_flags != def->x_target_flags
4809 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4810 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4811 || enum_opts_set.x_ix86_fpmath)
4813 /* If we are using the default tune= or arch=, undo the string assigned,
4814 and use the default. */
4815 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4816 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4817 else if (!orig_arch_specified)
4818 opts->x_ix86_arch_string = NULL;
4820 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4821 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4822 else if (orig_tune_defaulted)
4823 opts->x_ix86_tune_string = NULL;
4825 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4826 if (enum_opts_set.x_ix86_fpmath)
4827 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4828 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4829 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4831 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4832 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4835 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4836 ix86_option_override_internal (false, opts, opts_set);
4838 /* Add any builtin functions with the new isa if any. */
4839 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4841 /* Save the current options unless we are validating options for
4842 #pragma. */
4843 t = build_target_option_node (opts);
4845 opts->x_ix86_arch_string = orig_arch_string;
4846 opts->x_ix86_tune_string = orig_tune_string;
4847 opts_set->x_ix86_fpmath = orig_fpmath_set;
4849 /* Free up memory allocated to hold the strings */
4850 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4851 free (option_strings[i]);
4854 return t;
4857 /* Hook to validate attribute((target("string"))). */
4859 static bool
4860 ix86_valid_target_attribute_p (tree fndecl,
4861 tree ARG_UNUSED (name),
4862 tree args,
4863 int ARG_UNUSED (flags))
4865 struct gcc_options func_options;
4866 tree new_target, new_optimize;
4867 bool ret = true;
4869 /* attribute((target("default"))) does nothing, beyond
4870 affecting multi-versioning. */
4871 if (TREE_VALUE (args)
4872 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4873 && TREE_CHAIN (args) == NULL_TREE
4874 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4875 return true;
4877 tree old_optimize = build_optimization_node (&global_options);
4879 /* Get the optimization options of the current function. */
4880 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4882 if (!func_optimize)
4883 func_optimize = old_optimize;
4885 /* Init func_options. */
4886 memset (&func_options, 0, sizeof (func_options));
4887 init_options_struct (&func_options, NULL);
4888 lang_hooks.init_options_struct (&func_options);
4890 cl_optimization_restore (&func_options,
4891 TREE_OPTIMIZATION (func_optimize));
4893 /* Initialize func_options to the default before its target options can
4894 be set. */
4895 cl_target_option_restore (&func_options,
4896 TREE_TARGET_OPTION (target_option_default_node));
4898 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4899 &global_options_set);
4901 new_optimize = build_optimization_node (&func_options);
4903 if (new_target == error_mark_node)
4904 ret = false;
4906 else if (fndecl && new_target)
4908 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4910 if (old_optimize != new_optimize)
4911 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4914 return ret;
4918 /* Hook to determine if one function can safely inline another. */
4920 static bool
4921 ix86_can_inline_p (tree caller, tree callee)
4923 bool ret = false;
4924 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4925 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4927 /* If callee has no option attributes, then it is ok to inline. */
4928 if (!callee_tree)
4929 ret = true;
4931 /* If caller has no option attributes, but callee does then it is not ok to
4932 inline. */
4933 else if (!caller_tree)
4934 ret = false;
4936 else
4938 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4939 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4941 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4942 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4943 function. */
4944 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4945 != callee_opts->x_ix86_isa_flags)
4946 ret = false;
4948 /* See if we have the same non-isa options. */
4949 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4950 ret = false;
4952 /* See if arch, tune, etc. are the same. */
4953 else if (caller_opts->arch != callee_opts->arch)
4954 ret = false;
4956 else if (caller_opts->tune != callee_opts->tune)
4957 ret = false;
4959 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4960 ret = false;
4962 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4963 ret = false;
4965 else
4966 ret = true;
4969 return ret;
4973 /* Remember the last target of ix86_set_current_function. */
4974 static GTY(()) tree ix86_previous_fndecl;
4976 /* Invalidate ix86_previous_fndecl cache. */
4977 void
4978 ix86_reset_previous_fndecl (void)
4980 ix86_previous_fndecl = NULL_TREE;
4983 /* Establish appropriate back-end context for processing the function
4984 FNDECL. The argument might be NULL to indicate processing at top
4985 level, outside of any function scope. */
4986 static void
4987 ix86_set_current_function (tree fndecl)
4989 /* Only change the context if the function changes. This hook is called
4990 several times in the course of compiling a function, and we don't want to
4991 slow things down too much or call target_reinit when it isn't safe. */
4992 if (fndecl && fndecl != ix86_previous_fndecl)
4994 tree old_tree = (ix86_previous_fndecl
4995 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4996 : NULL_TREE);
4998 tree new_tree = (fndecl
4999 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5000 : NULL_TREE);
5002 ix86_previous_fndecl = fndecl;
5003 if (old_tree == new_tree)
5006 else if (new_tree)
5008 cl_target_option_restore (&global_options,
5009 TREE_TARGET_OPTION (new_tree));
5010 if (TREE_TARGET_GLOBALS (new_tree))
5011 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5012 else
5013 TREE_TARGET_GLOBALS (new_tree)
5014 = save_target_globals_default_opts ();
5017 else if (old_tree)
5019 new_tree = target_option_current_node;
5020 cl_target_option_restore (&global_options,
5021 TREE_TARGET_OPTION (new_tree));
5022 if (TREE_TARGET_GLOBALS (new_tree))
5023 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5024 else if (new_tree == target_option_default_node)
5025 restore_target_globals (&default_target_globals);
5026 else
5027 TREE_TARGET_GLOBALS (new_tree)
5028 = save_target_globals_default_opts ();
5034 /* Return true if this goes in large data/bss. */
5036 static bool
5037 ix86_in_large_data_p (tree exp)
5039 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5040 return false;
5042 /* Functions are never large data. */
5043 if (TREE_CODE (exp) == FUNCTION_DECL)
5044 return false;
5046 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5048 const char *section = DECL_SECTION_NAME (exp);
5049 if (strcmp (section, ".ldata") == 0
5050 || strcmp (section, ".lbss") == 0)
5051 return true;
5052 return false;
5054 else
5056 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5058 /* If this is an incomplete type with size 0, then we can't put it
5059 in data because it might be too big when completed. Also,
5060 int_size_in_bytes returns -1 if size can vary or is larger than
5061 an integer in which case also it is safer to assume that it goes in
5062 large data. */
5063 if (size <= 0 || size > ix86_section_threshold)
5064 return true;
5067 return false;
5070 /* Switch to the appropriate section for output of DECL.
5071 DECL is either a `VAR_DECL' node or a constant of some sort.
5072 RELOC indicates whether forming the initial value of DECL requires
5073 link-time relocations. */
5075 ATTRIBUTE_UNUSED static section *
5076 x86_64_elf_select_section (tree decl, int reloc,
5077 unsigned HOST_WIDE_INT align)
5079 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5080 && ix86_in_large_data_p (decl))
5082 const char *sname = NULL;
5083 unsigned int flags = SECTION_WRITE;
5084 switch (categorize_decl_for_section (decl, reloc))
5086 case SECCAT_DATA:
5087 sname = ".ldata";
5088 break;
5089 case SECCAT_DATA_REL:
5090 sname = ".ldata.rel";
5091 break;
5092 case SECCAT_DATA_REL_LOCAL:
5093 sname = ".ldata.rel.local";
5094 break;
5095 case SECCAT_DATA_REL_RO:
5096 sname = ".ldata.rel.ro";
5097 break;
5098 case SECCAT_DATA_REL_RO_LOCAL:
5099 sname = ".ldata.rel.ro.local";
5100 break;
5101 case SECCAT_BSS:
5102 sname = ".lbss";
5103 flags |= SECTION_BSS;
5104 break;
5105 case SECCAT_RODATA:
5106 case SECCAT_RODATA_MERGE_STR:
5107 case SECCAT_RODATA_MERGE_STR_INIT:
5108 case SECCAT_RODATA_MERGE_CONST:
5109 sname = ".lrodata";
5110 flags = 0;
5111 break;
5112 case SECCAT_SRODATA:
5113 case SECCAT_SDATA:
5114 case SECCAT_SBSS:
5115 gcc_unreachable ();
5116 case SECCAT_TEXT:
5117 case SECCAT_TDATA:
5118 case SECCAT_TBSS:
5119 /* We don't split these for medium model. Place them into
5120 default sections and hope for best. */
5121 break;
5123 if (sname)
5125 /* We might get called with string constants, but get_named_section
5126 doesn't like them as they are not DECLs. Also, we need to set
5127 flags in that case. */
5128 if (!DECL_P (decl))
5129 return get_section (sname, flags, NULL);
5130 return get_named_section (decl, sname, reloc);
5133 return default_elf_select_section (decl, reloc, align);
5136 /* Select a set of attributes for section NAME based on the properties
5137 of DECL and whether or not RELOC indicates that DECL's initializer
5138 might contain runtime relocations. */
5140 static unsigned int ATTRIBUTE_UNUSED
5141 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5143 unsigned int flags = default_section_type_flags (decl, name, reloc);
5145 if (decl == NULL_TREE
5146 && (strcmp (name, ".ldata.rel.ro") == 0
5147 || strcmp (name, ".ldata.rel.ro.local") == 0))
5148 flags |= SECTION_RELRO;
5150 if (strcmp (name, ".lbss") == 0
5151 || strncmp (name, ".lbss.", 5) == 0
5152 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5153 flags |= SECTION_BSS;
5155 return flags;
5158 /* Build up a unique section name, expressed as a
5159 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5160 RELOC indicates whether the initial value of EXP requires
5161 link-time relocations. */
5163 static void ATTRIBUTE_UNUSED
5164 x86_64_elf_unique_section (tree decl, int reloc)
5166 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5167 && ix86_in_large_data_p (decl))
5169 const char *prefix = NULL;
5170 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5171 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5173 switch (categorize_decl_for_section (decl, reloc))
5175 case SECCAT_DATA:
5176 case SECCAT_DATA_REL:
5177 case SECCAT_DATA_REL_LOCAL:
5178 case SECCAT_DATA_REL_RO:
5179 case SECCAT_DATA_REL_RO_LOCAL:
5180 prefix = one_only ? ".ld" : ".ldata";
5181 break;
5182 case SECCAT_BSS:
5183 prefix = one_only ? ".lb" : ".lbss";
5184 break;
5185 case SECCAT_RODATA:
5186 case SECCAT_RODATA_MERGE_STR:
5187 case SECCAT_RODATA_MERGE_STR_INIT:
5188 case SECCAT_RODATA_MERGE_CONST:
5189 prefix = one_only ? ".lr" : ".lrodata";
5190 break;
5191 case SECCAT_SRODATA:
5192 case SECCAT_SDATA:
5193 case SECCAT_SBSS:
5194 gcc_unreachable ();
5195 case SECCAT_TEXT:
5196 case SECCAT_TDATA:
5197 case SECCAT_TBSS:
5198 /* We don't split these for medium model. Place them into
5199 default sections and hope for best. */
5200 break;
5202 if (prefix)
5204 const char *name, *linkonce;
5205 char *string;
5207 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5208 name = targetm.strip_name_encoding (name);
5210 /* If we're using one_only, then there needs to be a .gnu.linkonce
5211 prefix to the section name. */
5212 linkonce = one_only ? ".gnu.linkonce" : "";
5214 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5216 set_decl_section_name (decl, string);
5217 return;
5220 default_unique_section (decl, reloc);
5223 #ifdef COMMON_ASM_OP
5224 /* This says how to output assembler code to declare an
5225 uninitialized external linkage data object.
5227 For medium model x86-64 we need to use .largecomm opcode for
5228 large objects. */
5229 void
5230 x86_elf_aligned_common (FILE *file,
5231 const char *name, unsigned HOST_WIDE_INT size,
5232 int align)
5234 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5235 && size > (unsigned int)ix86_section_threshold)
5236 fputs (".largecomm\t", file);
5237 else
5238 fputs (COMMON_ASM_OP, file);
5239 assemble_name (file, name);
5240 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5241 size, align / BITS_PER_UNIT);
5243 #endif
5245 /* Utility function for targets to use in implementing
5246 ASM_OUTPUT_ALIGNED_BSS. */
5248 void
5249 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5250 const char *name, unsigned HOST_WIDE_INT size,
5251 int align)
5253 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5254 && size > (unsigned int)ix86_section_threshold)
5255 switch_to_section (get_named_section (decl, ".lbss", 0));
5256 else
5257 switch_to_section (bss_section);
5258 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5259 #ifdef ASM_DECLARE_OBJECT_NAME
5260 last_assemble_variable_decl = decl;
5261 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5262 #else
5263 /* Standard thing is just output label for the object. */
5264 ASM_OUTPUT_LABEL (file, name);
5265 #endif /* ASM_DECLARE_OBJECT_NAME */
5266 ASM_OUTPUT_SKIP (file, size ? size : 1);
5269 /* Decide whether we must probe the stack before any space allocation
5270 on this target. It's essentially TARGET_STACK_PROBE except when
5271 -fstack-check causes the stack to be already probed differently. */
5273 bool
5274 ix86_target_stack_probe (void)
5276 /* Do not probe the stack twice if static stack checking is enabled. */
5277 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5278 return false;
5280 return TARGET_STACK_PROBE;
5283 /* Decide whether we can make a sibling call to a function. DECL is the
5284 declaration of the function being targeted by the call and EXP is the
5285 CALL_EXPR representing the call. */
5287 static bool
5288 ix86_function_ok_for_sibcall (tree decl, tree exp)
5290 tree type, decl_or_type;
5291 rtx a, b;
5293 /* If we are generating position-independent code, we cannot sibcall
5294 optimize any indirect call, or a direct call to a global function,
5295 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5296 if (!TARGET_MACHO
5297 && !TARGET_64BIT
5298 && flag_pic
5299 && (!decl || !targetm.binds_local_p (decl)))
5300 return false;
5302 /* If we need to align the outgoing stack, then sibcalling would
5303 unalign the stack, which may break the called function. */
5304 if (ix86_minimum_incoming_stack_boundary (true)
5305 < PREFERRED_STACK_BOUNDARY)
5306 return false;
5308 if (decl)
5310 decl_or_type = decl;
5311 type = TREE_TYPE (decl);
5313 else
5315 /* We're looking at the CALL_EXPR, we need the type of the function. */
5316 type = CALL_EXPR_FN (exp); /* pointer expression */
5317 type = TREE_TYPE (type); /* pointer type */
5318 type = TREE_TYPE (type); /* function type */
5319 decl_or_type = type;
5322 /* Check that the return value locations are the same. Like
5323 if we are returning floats on the 80387 register stack, we cannot
5324 make a sibcall from a function that doesn't return a float to a
5325 function that does or, conversely, from a function that does return
5326 a float to a function that doesn't; the necessary stack adjustment
5327 would not be executed. This is also the place we notice
5328 differences in the return value ABI. Note that it is ok for one
5329 of the functions to have void return type as long as the return
5330 value of the other is passed in a register. */
5331 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5332 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5333 cfun->decl, false);
5334 if (STACK_REG_P (a) || STACK_REG_P (b))
5336 if (!rtx_equal_p (a, b))
5337 return false;
5339 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5341 else if (!rtx_equal_p (a, b))
5342 return false;
5344 if (TARGET_64BIT)
5346 /* The SYSV ABI has more call-clobbered registers;
5347 disallow sibcalls from MS to SYSV. */
5348 if (cfun->machine->call_abi == MS_ABI
5349 && ix86_function_type_abi (type) == SYSV_ABI)
5350 return false;
5352 else
5354 /* If this call is indirect, we'll need to be able to use a
5355 call-clobbered register for the address of the target function.
5356 Make sure that all such registers are not used for passing
5357 parameters. Note that DLLIMPORT functions are indirect. */
5358 if (!decl
5359 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5361 if (ix86_function_regparm (type, NULL) >= 3)
5363 /* ??? Need to count the actual number of registers to be used,
5364 not the possible number of registers. Fix later. */
5365 return false;
5370 /* Otherwise okay. That also includes certain types of indirect calls. */
5371 return true;
5374 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5375 and "sseregparm" calling convention attributes;
5376 arguments as in struct attribute_spec.handler. */
5378 static tree
5379 ix86_handle_cconv_attribute (tree *node, tree name,
5380 tree args,
5381 int flags ATTRIBUTE_UNUSED,
5382 bool *no_add_attrs)
5384 if (TREE_CODE (*node) != FUNCTION_TYPE
5385 && TREE_CODE (*node) != METHOD_TYPE
5386 && TREE_CODE (*node) != FIELD_DECL
5387 && TREE_CODE (*node) != TYPE_DECL)
5389 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5390 name);
5391 *no_add_attrs = true;
5392 return NULL_TREE;
5395 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5396 if (is_attribute_p ("regparm", name))
5398 tree cst;
5400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5402 error ("fastcall and regparm attributes are not compatible");
5405 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5407 error ("regparam and thiscall attributes are not compatible");
5410 cst = TREE_VALUE (args);
5411 if (TREE_CODE (cst) != INTEGER_CST)
5413 warning (OPT_Wattributes,
5414 "%qE attribute requires an integer constant argument",
5415 name);
5416 *no_add_attrs = true;
5418 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5420 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5421 name, REGPARM_MAX);
5422 *no_add_attrs = true;
5425 return NULL_TREE;
5428 if (TARGET_64BIT)
5430 /* Do not warn when emulating the MS ABI. */
5431 if ((TREE_CODE (*node) != FUNCTION_TYPE
5432 && TREE_CODE (*node) != METHOD_TYPE)
5433 || ix86_function_type_abi (*node) != MS_ABI)
5434 warning (OPT_Wattributes, "%qE attribute ignored",
5435 name);
5436 *no_add_attrs = true;
5437 return NULL_TREE;
5440 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5441 if (is_attribute_p ("fastcall", name))
5443 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5445 error ("fastcall and cdecl attributes are not compatible");
5447 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5449 error ("fastcall and stdcall attributes are not compatible");
5451 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5453 error ("fastcall and regparm attributes are not compatible");
5455 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5457 error ("fastcall and thiscall attributes are not compatible");
5461 /* Can combine stdcall with fastcall (redundant), regparm and
5462 sseregparm. */
5463 else if (is_attribute_p ("stdcall", name))
5465 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5467 error ("stdcall and cdecl attributes are not compatible");
5469 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5471 error ("stdcall and fastcall attributes are not compatible");
5473 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5475 error ("stdcall and thiscall attributes are not compatible");
5479 /* Can combine cdecl with regparm and sseregparm. */
5480 else if (is_attribute_p ("cdecl", name))
5482 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5484 error ("stdcall and cdecl attributes are not compatible");
5486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5488 error ("fastcall and cdecl attributes are not compatible");
5490 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5492 error ("cdecl and thiscall attributes are not compatible");
5495 else if (is_attribute_p ("thiscall", name))
5497 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5498 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5499 name);
5500 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5502 error ("stdcall and thiscall attributes are not compatible");
5504 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5506 error ("fastcall and thiscall attributes are not compatible");
5508 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5510 error ("cdecl and thiscall attributes are not compatible");
5514 /* Can combine sseregparm with all attributes. */
5516 return NULL_TREE;
5519 /* The transactional memory builtins are implicitly regparm or fastcall
5520 depending on the ABI. Override the generic do-nothing attribute that
5521 these builtins were declared with, and replace it with one of the two
5522 attributes that we expect elsewhere. */
5524 static tree
5525 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5526 tree args ATTRIBUTE_UNUSED,
5527 int flags, bool *no_add_attrs)
5529 tree alt;
5531 /* In no case do we want to add the placeholder attribute. */
5532 *no_add_attrs = true;
5534 /* The 64-bit ABI is unchanged for transactional memory. */
5535 if (TARGET_64BIT)
5536 return NULL_TREE;
5538 /* ??? Is there a better way to validate 32-bit windows? We have
5539 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5540 if (CHECK_STACK_LIMIT > 0)
5541 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5542 else
5544 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5545 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5547 decl_attributes (node, alt, flags);
5549 return NULL_TREE;
5552 /* This function determines from TYPE the calling-convention. */
5554 unsigned int
5555 ix86_get_callcvt (const_tree type)
5557 unsigned int ret = 0;
5558 bool is_stdarg;
5559 tree attrs;
5561 if (TARGET_64BIT)
5562 return IX86_CALLCVT_CDECL;
5564 attrs = TYPE_ATTRIBUTES (type);
5565 if (attrs != NULL_TREE)
5567 if (lookup_attribute ("cdecl", attrs))
5568 ret |= IX86_CALLCVT_CDECL;
5569 else if (lookup_attribute ("stdcall", attrs))
5570 ret |= IX86_CALLCVT_STDCALL;
5571 else if (lookup_attribute ("fastcall", attrs))
5572 ret |= IX86_CALLCVT_FASTCALL;
5573 else if (lookup_attribute ("thiscall", attrs))
5574 ret |= IX86_CALLCVT_THISCALL;
5576 /* Regparam isn't allowed for thiscall and fastcall. */
5577 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5579 if (lookup_attribute ("regparm", attrs))
5580 ret |= IX86_CALLCVT_REGPARM;
5581 if (lookup_attribute ("sseregparm", attrs))
5582 ret |= IX86_CALLCVT_SSEREGPARM;
5585 if (IX86_BASE_CALLCVT(ret) != 0)
5586 return ret;
5589 is_stdarg = stdarg_p (type);
5590 if (TARGET_RTD && !is_stdarg)
5591 return IX86_CALLCVT_STDCALL | ret;
5593 if (ret != 0
5594 || is_stdarg
5595 || TREE_CODE (type) != METHOD_TYPE
5596 || ix86_function_type_abi (type) != MS_ABI)
5597 return IX86_CALLCVT_CDECL | ret;
5599 return IX86_CALLCVT_THISCALL;
5602 /* Return 0 if the attributes for two types are incompatible, 1 if they
5603 are compatible, and 2 if they are nearly compatible (which causes a
5604 warning to be generated). */
5606 static int
5607 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5609 unsigned int ccvt1, ccvt2;
5611 if (TREE_CODE (type1) != FUNCTION_TYPE
5612 && TREE_CODE (type1) != METHOD_TYPE)
5613 return 1;
5615 ccvt1 = ix86_get_callcvt (type1);
5616 ccvt2 = ix86_get_callcvt (type2);
5617 if (ccvt1 != ccvt2)
5618 return 0;
5619 if (ix86_function_regparm (type1, NULL)
5620 != ix86_function_regparm (type2, NULL))
5621 return 0;
5623 return 1;
5626 /* Return the regparm value for a function with the indicated TYPE and DECL.
5627 DECL may be NULL when calling function indirectly
5628 or considering a libcall. */
5630 static int
5631 ix86_function_regparm (const_tree type, const_tree decl)
5633 tree attr;
5634 int regparm;
5635 unsigned int ccvt;
5637 if (TARGET_64BIT)
5638 return (ix86_function_type_abi (type) == SYSV_ABI
5639 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5640 ccvt = ix86_get_callcvt (type);
5641 regparm = ix86_regparm;
5643 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5645 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5646 if (attr)
5648 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5649 return regparm;
5652 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5653 return 2;
5654 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5655 return 1;
5657 /* Use register calling convention for local functions when possible. */
5658 if (decl
5659 && TREE_CODE (decl) == FUNCTION_DECL
5660 /* Caller and callee must agree on the calling convention, so
5661 checking here just optimize means that with
5662 __attribute__((optimize (...))) caller could use regparm convention
5663 and callee not, or vice versa. Instead look at whether the callee
5664 is optimized or not. */
5665 && opt_for_fn (decl, optimize)
5666 && !(profile_flag && !flag_fentry))
5668 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5669 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5670 if (i && i->local && i->can_change_signature)
5672 int local_regparm, globals = 0, regno;
5674 /* Make sure no regparm register is taken by a
5675 fixed register variable. */
5676 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5677 if (fixed_regs[local_regparm])
5678 break;
5680 /* We don't want to use regparm(3) for nested functions as
5681 these use a static chain pointer in the third argument. */
5682 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5683 local_regparm = 2;
5685 /* In 32-bit mode save a register for the split stack. */
5686 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5687 local_regparm = 2;
5689 /* Each fixed register usage increases register pressure,
5690 so less registers should be used for argument passing.
5691 This functionality can be overriden by an explicit
5692 regparm value. */
5693 for (regno = AX_REG; regno <= DI_REG; regno++)
5694 if (fixed_regs[regno])
5695 globals++;
5697 local_regparm
5698 = globals < local_regparm ? local_regparm - globals : 0;
5700 if (local_regparm > regparm)
5701 regparm = local_regparm;
5705 return regparm;
5708 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5709 DFmode (2) arguments in SSE registers for a function with the
5710 indicated TYPE and DECL. DECL may be NULL when calling function
5711 indirectly or considering a libcall. Otherwise return 0. */
5713 static int
5714 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5716 gcc_assert (!TARGET_64BIT);
5718 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5719 by the sseregparm attribute. */
5720 if (TARGET_SSEREGPARM
5721 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5723 if (!TARGET_SSE)
5725 if (warn)
5727 if (decl)
5728 error ("calling %qD with attribute sseregparm without "
5729 "SSE/SSE2 enabled", decl);
5730 else
5731 error ("calling %qT with attribute sseregparm without "
5732 "SSE/SSE2 enabled", type);
5734 return 0;
5737 return 2;
5740 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5741 (and DFmode for SSE2) arguments in SSE registers. */
5742 if (decl && TARGET_SSE_MATH && optimize
5743 && !(profile_flag && !flag_fentry))
5745 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5746 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5747 if (i && i->local && i->can_change_signature)
5748 return TARGET_SSE2 ? 2 : 1;
5751 return 0;
5754 /* Return true if EAX is live at the start of the function. Used by
5755 ix86_expand_prologue to determine if we need special help before
5756 calling allocate_stack_worker. */
5758 static bool
5759 ix86_eax_live_at_start_p (void)
5761 /* Cheat. Don't bother working forward from ix86_function_regparm
5762 to the function type to whether an actual argument is located in
5763 eax. Instead just look at cfg info, which is still close enough
5764 to correct at this point. This gives false positives for broken
5765 functions that might use uninitialized data that happens to be
5766 allocated in eax, but who cares? */
5767 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5770 static bool
5771 ix86_keep_aggregate_return_pointer (tree fntype)
5773 tree attr;
5775 if (!TARGET_64BIT)
5777 attr = lookup_attribute ("callee_pop_aggregate_return",
5778 TYPE_ATTRIBUTES (fntype));
5779 if (attr)
5780 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5782 /* For 32-bit MS-ABI the default is to keep aggregate
5783 return pointer. */
5784 if (ix86_function_type_abi (fntype) == MS_ABI)
5785 return true;
5787 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5790 /* Value is the number of bytes of arguments automatically
5791 popped when returning from a subroutine call.
5792 FUNDECL is the declaration node of the function (as a tree),
5793 FUNTYPE is the data type of the function (as a tree),
5794 or for a library call it is an identifier node for the subroutine name.
5795 SIZE is the number of bytes of arguments passed on the stack.
5797 On the 80386, the RTD insn may be used to pop them if the number
5798 of args is fixed, but if the number is variable then the caller
5799 must pop them all. RTD can't be used for library calls now
5800 because the library is compiled with the Unix compiler.
5801 Use of RTD is a selectable option, since it is incompatible with
5802 standard Unix calling sequences. If the option is not selected,
5803 the caller must always pop the args.
5805 The attribute stdcall is equivalent to RTD on a per module basis. */
5807 static int
5808 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5810 unsigned int ccvt;
5812 /* None of the 64-bit ABIs pop arguments. */
5813 if (TARGET_64BIT)
5814 return 0;
5816 ccvt = ix86_get_callcvt (funtype);
5818 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5819 | IX86_CALLCVT_THISCALL)) != 0
5820 && ! stdarg_p (funtype))
5821 return size;
5823 /* Lose any fake structure return argument if it is passed on the stack. */
5824 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5825 && !ix86_keep_aggregate_return_pointer (funtype))
5827 int nregs = ix86_function_regparm (funtype, fundecl);
5828 if (nregs == 0)
5829 return GET_MODE_SIZE (Pmode);
5832 return 0;
5835 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5837 static bool
5838 ix86_legitimate_combined_insn (rtx insn)
5840 /* Check operand constraints in case hard registers were propagated
5841 into insn pattern. This check prevents combine pass from
5842 generating insn patterns with invalid hard register operands.
5843 These invalid insns can eventually confuse reload to error out
5844 with a spill failure. See also PRs 46829 and 46843. */
5845 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5847 int i;
5849 extract_insn (insn);
5850 preprocess_constraints (insn);
5852 int n_operands = recog_data.n_operands;
5853 int n_alternatives = recog_data.n_alternatives;
5854 for (i = 0; i < n_operands; i++)
5856 rtx op = recog_data.operand[i];
5857 enum machine_mode mode = GET_MODE (op);
5858 const operand_alternative *op_alt;
5859 int offset = 0;
5860 bool win;
5861 int j;
5863 /* For pre-AVX disallow unaligned loads/stores where the
5864 instructions don't support it. */
5865 if (!TARGET_AVX
5866 && VECTOR_MODE_P (GET_MODE (op))
5867 && misaligned_operand (op, GET_MODE (op)))
5869 int min_align = get_attr_ssememalign (insn);
5870 if (min_align == 0)
5871 return false;
5874 /* A unary operator may be accepted by the predicate, but it
5875 is irrelevant for matching constraints. */
5876 if (UNARY_P (op))
5877 op = XEXP (op, 0);
5879 if (GET_CODE (op) == SUBREG)
5881 if (REG_P (SUBREG_REG (op))
5882 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5883 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5884 GET_MODE (SUBREG_REG (op)),
5885 SUBREG_BYTE (op),
5886 GET_MODE (op));
5887 op = SUBREG_REG (op);
5890 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5891 continue;
5893 op_alt = recog_op_alt;
5895 /* Operand has no constraints, anything is OK. */
5896 win = !n_alternatives;
5898 alternative_mask enabled = recog_data.enabled_alternatives;
5899 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5901 if (!TEST_BIT (enabled, j))
5902 continue;
5903 if (op_alt[i].anything_ok
5904 || (op_alt[i].matches != -1
5905 && operands_match_p
5906 (recog_data.operand[i],
5907 recog_data.operand[op_alt[i].matches]))
5908 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5910 win = true;
5911 break;
5915 if (!win)
5916 return false;
5920 return true;
5923 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5925 static unsigned HOST_WIDE_INT
5926 ix86_asan_shadow_offset (void)
5928 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5929 : HOST_WIDE_INT_C (0x7fff8000))
5930 : (HOST_WIDE_INT_1 << 29);
5933 /* Argument support functions. */
5935 /* Return true when register may be used to pass function parameters. */
5936 bool
5937 ix86_function_arg_regno_p (int regno)
5939 int i;
5940 const int *parm_regs;
5942 if (!TARGET_64BIT)
5944 if (TARGET_MACHO)
5945 return (regno < REGPARM_MAX
5946 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5947 else
5948 return (regno < REGPARM_MAX
5949 || (TARGET_MMX && MMX_REGNO_P (regno)
5950 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5951 || (TARGET_SSE && SSE_REGNO_P (regno)
5952 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5955 if (TARGET_SSE && SSE_REGNO_P (regno)
5956 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5957 return true;
5959 /* TODO: The function should depend on current function ABI but
5960 builtins.c would need updating then. Therefore we use the
5961 default ABI. */
5963 /* RAX is used as hidden argument to va_arg functions. */
5964 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5965 return true;
5967 if (ix86_abi == MS_ABI)
5968 parm_regs = x86_64_ms_abi_int_parameter_registers;
5969 else
5970 parm_regs = x86_64_int_parameter_registers;
5971 for (i = 0; i < (ix86_abi == MS_ABI
5972 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5973 if (regno == parm_regs[i])
5974 return true;
5975 return false;
5978 /* Return if we do not know how to pass TYPE solely in registers. */
5980 static bool
5981 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5983 if (must_pass_in_stack_var_size_or_pad (mode, type))
5984 return true;
5986 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5987 The layout_type routine is crafty and tries to trick us into passing
5988 currently unsupported vector types on the stack by using TImode. */
5989 return (!TARGET_64BIT && mode == TImode
5990 && type && TREE_CODE (type) != VECTOR_TYPE);
5993 /* It returns the size, in bytes, of the area reserved for arguments passed
5994 in registers for the function represented by fndecl dependent to the used
5995 abi format. */
5997 ix86_reg_parm_stack_space (const_tree fndecl)
5999 enum calling_abi call_abi = SYSV_ABI;
6000 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6001 call_abi = ix86_function_abi (fndecl);
6002 else
6003 call_abi = ix86_function_type_abi (fndecl);
6004 if (TARGET_64BIT && call_abi == MS_ABI)
6005 return 32;
6006 return 0;
6009 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6010 call abi used. */
6011 enum calling_abi
6012 ix86_function_type_abi (const_tree fntype)
6014 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6016 enum calling_abi abi = ix86_abi;
6017 if (abi == SYSV_ABI)
6019 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6020 abi = MS_ABI;
6022 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6023 abi = SYSV_ABI;
6024 return abi;
6026 return ix86_abi;
6029 /* We add this as a workaround in order to use libc_has_function
6030 hook in i386.md. */
6031 bool
6032 ix86_libc_has_function (enum function_class fn_class)
6034 return targetm.libc_has_function (fn_class);
6037 static bool
6038 ix86_function_ms_hook_prologue (const_tree fn)
6040 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6042 if (decl_function_context (fn) != NULL_TREE)
6043 error_at (DECL_SOURCE_LOCATION (fn),
6044 "ms_hook_prologue is not compatible with nested function");
6045 else
6046 return true;
6048 return false;
6051 static enum calling_abi
6052 ix86_function_abi (const_tree fndecl)
6054 if (! fndecl)
6055 return ix86_abi;
6056 return ix86_function_type_abi (TREE_TYPE (fndecl));
6059 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6060 call abi used. */
6061 enum calling_abi
6062 ix86_cfun_abi (void)
6064 if (! cfun)
6065 return ix86_abi;
6066 return cfun->machine->call_abi;
6069 /* Write the extra assembler code needed to declare a function properly. */
6071 void
6072 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6073 tree decl)
6075 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6077 if (is_ms_hook)
6079 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6080 unsigned int filler_cc = 0xcccccccc;
6082 for (i = 0; i < filler_count; i += 4)
6083 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6086 #ifdef SUBTARGET_ASM_UNWIND_INIT
6087 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6088 #endif
6090 ASM_OUTPUT_LABEL (asm_out_file, fname);
6092 /* Output magic byte marker, if hot-patch attribute is set. */
6093 if (is_ms_hook)
6095 if (TARGET_64BIT)
6097 /* leaq [%rsp + 0], %rsp */
6098 asm_fprintf (asm_out_file, ASM_BYTE
6099 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6101 else
6103 /* movl.s %edi, %edi
6104 push %ebp
6105 movl.s %esp, %ebp */
6106 asm_fprintf (asm_out_file, ASM_BYTE
6107 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6112 /* regclass.c */
6113 extern void init_regs (void);
6115 /* Implementation of call abi switching target hook. Specific to FNDECL
6116 the specific call register sets are set. See also
6117 ix86_conditional_register_usage for more details. */
6118 void
6119 ix86_call_abi_override (const_tree fndecl)
6121 if (fndecl == NULL_TREE)
6122 cfun->machine->call_abi = ix86_abi;
6123 else
6124 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6127 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6128 expensive re-initialization of init_regs each time we switch function context
6129 since this is needed only during RTL expansion. */
6130 static void
6131 ix86_maybe_switch_abi (void)
6133 if (TARGET_64BIT &&
6134 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6135 reinit_regs ();
6138 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6139 for a call to a function whose data type is FNTYPE.
6140 For a library call, FNTYPE is 0. */
6142 void
6143 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6144 tree fntype, /* tree ptr for function decl */
6145 rtx libname, /* SYMBOL_REF of library name or 0 */
6146 tree fndecl,
6147 int caller)
6149 struct cgraph_local_info *i;
6151 memset (cum, 0, sizeof (*cum));
6153 if (fndecl)
6155 i = cgraph_local_info (fndecl);
6156 cum->call_abi = ix86_function_abi (fndecl);
6158 else
6160 i = NULL;
6161 cum->call_abi = ix86_function_type_abi (fntype);
6164 cum->caller = caller;
6166 /* Set up the number of registers to use for passing arguments. */
6167 cum->nregs = ix86_regparm;
6168 if (TARGET_64BIT)
6170 cum->nregs = (cum->call_abi == SYSV_ABI
6171 ? X86_64_REGPARM_MAX
6172 : X86_64_MS_REGPARM_MAX);
6174 if (TARGET_SSE)
6176 cum->sse_nregs = SSE_REGPARM_MAX;
6177 if (TARGET_64BIT)
6179 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6180 ? X86_64_SSE_REGPARM_MAX
6181 : X86_64_MS_SSE_REGPARM_MAX);
6184 if (TARGET_MMX)
6185 cum->mmx_nregs = MMX_REGPARM_MAX;
6186 cum->warn_avx512f = true;
6187 cum->warn_avx = true;
6188 cum->warn_sse = true;
6189 cum->warn_mmx = true;
6191 /* Because type might mismatch in between caller and callee, we need to
6192 use actual type of function for local calls.
6193 FIXME: cgraph_analyze can be told to actually record if function uses
6194 va_start so for local functions maybe_vaarg can be made aggressive
6195 helping K&R code.
6196 FIXME: once typesytem is fixed, we won't need this code anymore. */
6197 if (i && i->local && i->can_change_signature)
6198 fntype = TREE_TYPE (fndecl);
6199 cum->maybe_vaarg = (fntype
6200 ? (!prototype_p (fntype) || stdarg_p (fntype))
6201 : !libname);
6203 if (!TARGET_64BIT)
6205 /* If there are variable arguments, then we won't pass anything
6206 in registers in 32-bit mode. */
6207 if (stdarg_p (fntype))
6209 cum->nregs = 0;
6210 cum->sse_nregs = 0;
6211 cum->mmx_nregs = 0;
6212 cum->warn_avx512f = false;
6213 cum->warn_avx = false;
6214 cum->warn_sse = false;
6215 cum->warn_mmx = false;
6216 return;
6219 /* Use ecx and edx registers if function has fastcall attribute,
6220 else look for regparm information. */
6221 if (fntype)
6223 unsigned int ccvt = ix86_get_callcvt (fntype);
6224 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6226 cum->nregs = 1;
6227 cum->fastcall = 1; /* Same first register as in fastcall. */
6229 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6231 cum->nregs = 2;
6232 cum->fastcall = 1;
6234 else
6235 cum->nregs = ix86_function_regparm (fntype, fndecl);
6238 /* Set up the number of SSE registers used for passing SFmode
6239 and DFmode arguments. Warn for mismatching ABI. */
6240 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6244 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6245 But in the case of vector types, it is some vector mode.
6247 When we have only some of our vector isa extensions enabled, then there
6248 are some modes for which vector_mode_supported_p is false. For these
6249 modes, the generic vector support in gcc will choose some non-vector mode
6250 in order to implement the type. By computing the natural mode, we'll
6251 select the proper ABI location for the operand and not depend on whatever
6252 the middle-end decides to do with these vector types.
6254 The midde-end can't deal with the vector types > 16 bytes. In this
6255 case, we return the original mode and warn ABI change if CUM isn't
6256 NULL.
6258 If INT_RETURN is true, warn ABI change if the vector mode isn't
6259 available for function return value. */
6261 static enum machine_mode
6262 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6263 bool in_return)
6265 enum machine_mode mode = TYPE_MODE (type);
6267 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6269 HOST_WIDE_INT size = int_size_in_bytes (type);
6270 if ((size == 8 || size == 16 || size == 32 || size == 64)
6271 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6272 && TYPE_VECTOR_SUBPARTS (type) > 1)
6274 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6276 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6277 mode = MIN_MODE_VECTOR_FLOAT;
6278 else
6279 mode = MIN_MODE_VECTOR_INT;
6281 /* Get the mode which has this inner mode and number of units. */
6282 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6283 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6284 && GET_MODE_INNER (mode) == innermode)
6286 if (size == 64 && !TARGET_AVX512F)
6288 static bool warnedavx512f;
6289 static bool warnedavx512f_ret;
6291 if (cum && cum->warn_avx512f && !warnedavx512f)
6293 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6294 "without AVX512F enabled changes the ABI"))
6295 warnedavx512f = true;
6297 else if (in_return && !warnedavx512f_ret)
6299 if (warning (OPT_Wpsabi, "AVX512F vector return "
6300 "without AVX512F enabled changes the ABI"))
6301 warnedavx512f_ret = true;
6304 return TYPE_MODE (type);
6306 else if (size == 32 && !TARGET_AVX)
6308 static bool warnedavx;
6309 static bool warnedavx_ret;
6311 if (cum && cum->warn_avx && !warnedavx)
6313 if (warning (OPT_Wpsabi, "AVX vector argument "
6314 "without AVX enabled changes the ABI"))
6315 warnedavx = true;
6317 else if (in_return && !warnedavx_ret)
6319 if (warning (OPT_Wpsabi, "AVX vector return "
6320 "without AVX enabled changes the ABI"))
6321 warnedavx_ret = true;
6324 return TYPE_MODE (type);
6326 else if (((size == 8 && TARGET_64BIT) || size == 16)
6327 && !TARGET_SSE)
6329 static bool warnedsse;
6330 static bool warnedsse_ret;
6332 if (cum && cum->warn_sse && !warnedsse)
6334 if (warning (OPT_Wpsabi, "SSE vector argument "
6335 "without SSE enabled changes the ABI"))
6336 warnedsse = true;
6338 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6340 if (warning (OPT_Wpsabi, "SSE vector return "
6341 "without SSE enabled changes the ABI"))
6342 warnedsse_ret = true;
6345 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6347 static bool warnedmmx;
6348 static bool warnedmmx_ret;
6350 if (cum && cum->warn_mmx && !warnedmmx)
6352 if (warning (OPT_Wpsabi, "MMX vector argument "
6353 "without MMX enabled changes the ABI"))
6354 warnedmmx = true;
6356 else if (in_return && !warnedmmx_ret)
6358 if (warning (OPT_Wpsabi, "MMX vector return "
6359 "without MMX enabled changes the ABI"))
6360 warnedmmx_ret = true;
6363 return mode;
6366 gcc_unreachable ();
6370 return mode;
6373 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6374 this may not agree with the mode that the type system has chosen for the
6375 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6376 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6378 static rtx
6379 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6380 unsigned int regno)
6382 rtx tmp;
6384 if (orig_mode != BLKmode)
6385 tmp = gen_rtx_REG (orig_mode, regno);
6386 else
6388 tmp = gen_rtx_REG (mode, regno);
6389 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6390 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6393 return tmp;
6396 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6397 of this code is to classify each 8bytes of incoming argument by the register
6398 class and assign registers accordingly. */
6400 /* Return the union class of CLASS1 and CLASS2.
6401 See the x86-64 PS ABI for details. */
6403 static enum x86_64_reg_class
6404 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6406 /* Rule #1: If both classes are equal, this is the resulting class. */
6407 if (class1 == class2)
6408 return class1;
6410 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6411 the other class. */
6412 if (class1 == X86_64_NO_CLASS)
6413 return class2;
6414 if (class2 == X86_64_NO_CLASS)
6415 return class1;
6417 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6418 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6419 return X86_64_MEMORY_CLASS;
6421 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6422 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6423 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6424 return X86_64_INTEGERSI_CLASS;
6425 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6426 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6427 return X86_64_INTEGER_CLASS;
6429 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6430 MEMORY is used. */
6431 if (class1 == X86_64_X87_CLASS
6432 || class1 == X86_64_X87UP_CLASS
6433 || class1 == X86_64_COMPLEX_X87_CLASS
6434 || class2 == X86_64_X87_CLASS
6435 || class2 == X86_64_X87UP_CLASS
6436 || class2 == X86_64_COMPLEX_X87_CLASS)
6437 return X86_64_MEMORY_CLASS;
6439 /* Rule #6: Otherwise class SSE is used. */
6440 return X86_64_SSE_CLASS;
6443 /* Classify the argument of type TYPE and mode MODE.
6444 CLASSES will be filled by the register class used to pass each word
6445 of the operand. The number of words is returned. In case the parameter
6446 should be passed in memory, 0 is returned. As a special case for zero
6447 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6449 BIT_OFFSET is used internally for handling records and specifies offset
6450 of the offset in bits modulo 512 to avoid overflow cases.
6452 See the x86-64 PS ABI for details.
6455 static int
6456 classify_argument (enum machine_mode mode, const_tree type,
6457 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6459 HOST_WIDE_INT bytes =
6460 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6461 int words
6462 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6464 /* Variable sized entities are always passed/returned in memory. */
6465 if (bytes < 0)
6466 return 0;
6468 if (mode != VOIDmode
6469 && targetm.calls.must_pass_in_stack (mode, type))
6470 return 0;
6472 if (type && AGGREGATE_TYPE_P (type))
6474 int i;
6475 tree field;
6476 enum x86_64_reg_class subclasses[MAX_CLASSES];
6478 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6479 if (bytes > 64)
6480 return 0;
6482 for (i = 0; i < words; i++)
6483 classes[i] = X86_64_NO_CLASS;
6485 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6486 signalize memory class, so handle it as special case. */
6487 if (!words)
6489 classes[0] = X86_64_NO_CLASS;
6490 return 1;
6493 /* Classify each field of record and merge classes. */
6494 switch (TREE_CODE (type))
6496 case RECORD_TYPE:
6497 /* And now merge the fields of structure. */
6498 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6500 if (TREE_CODE (field) == FIELD_DECL)
6502 int num;
6504 if (TREE_TYPE (field) == error_mark_node)
6505 continue;
6507 /* Bitfields are always classified as integer. Handle them
6508 early, since later code would consider them to be
6509 misaligned integers. */
6510 if (DECL_BIT_FIELD (field))
6512 for (i = (int_bit_position (field)
6513 + (bit_offset % 64)) / 8 / 8;
6514 i < ((int_bit_position (field) + (bit_offset % 64))
6515 + tree_to_shwi (DECL_SIZE (field))
6516 + 63) / 8 / 8; i++)
6517 classes[i] =
6518 merge_classes (X86_64_INTEGER_CLASS,
6519 classes[i]);
6521 else
6523 int pos;
6525 type = TREE_TYPE (field);
6527 /* Flexible array member is ignored. */
6528 if (TYPE_MODE (type) == BLKmode
6529 && TREE_CODE (type) == ARRAY_TYPE
6530 && TYPE_SIZE (type) == NULL_TREE
6531 && TYPE_DOMAIN (type) != NULL_TREE
6532 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6533 == NULL_TREE))
6535 static bool warned;
6537 if (!warned && warn_psabi)
6539 warned = true;
6540 inform (input_location,
6541 "the ABI of passing struct with"
6542 " a flexible array member has"
6543 " changed in GCC 4.4");
6545 continue;
6547 num = classify_argument (TYPE_MODE (type), type,
6548 subclasses,
6549 (int_bit_position (field)
6550 + bit_offset) % 512);
6551 if (!num)
6552 return 0;
6553 pos = (int_bit_position (field)
6554 + (bit_offset % 64)) / 8 / 8;
6555 for (i = 0; i < num && (i + pos) < words; i++)
6556 classes[i + pos] =
6557 merge_classes (subclasses[i], classes[i + pos]);
6561 break;
6563 case ARRAY_TYPE:
6564 /* Arrays are handled as small records. */
6566 int num;
6567 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6568 TREE_TYPE (type), subclasses, bit_offset);
6569 if (!num)
6570 return 0;
6572 /* The partial classes are now full classes. */
6573 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6574 subclasses[0] = X86_64_SSE_CLASS;
6575 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6576 && !((bit_offset % 64) == 0 && bytes == 4))
6577 subclasses[0] = X86_64_INTEGER_CLASS;
6579 for (i = 0; i < words; i++)
6580 classes[i] = subclasses[i % num];
6582 break;
6584 case UNION_TYPE:
6585 case QUAL_UNION_TYPE:
6586 /* Unions are similar to RECORD_TYPE but offset is always 0.
6588 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6590 if (TREE_CODE (field) == FIELD_DECL)
6592 int num;
6594 if (TREE_TYPE (field) == error_mark_node)
6595 continue;
6597 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6598 TREE_TYPE (field), subclasses,
6599 bit_offset);
6600 if (!num)
6601 return 0;
6602 for (i = 0; i < num && i < words; i++)
6603 classes[i] = merge_classes (subclasses[i], classes[i]);
6606 break;
6608 default:
6609 gcc_unreachable ();
6612 if (words > 2)
6614 /* When size > 16 bytes, if the first one isn't
6615 X86_64_SSE_CLASS or any other ones aren't
6616 X86_64_SSEUP_CLASS, everything should be passed in
6617 memory. */
6618 if (classes[0] != X86_64_SSE_CLASS)
6619 return 0;
6621 for (i = 1; i < words; i++)
6622 if (classes[i] != X86_64_SSEUP_CLASS)
6623 return 0;
6626 /* Final merger cleanup. */
6627 for (i = 0; i < words; i++)
6629 /* If one class is MEMORY, everything should be passed in
6630 memory. */
6631 if (classes[i] == X86_64_MEMORY_CLASS)
6632 return 0;
6634 /* The X86_64_SSEUP_CLASS should be always preceded by
6635 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6636 if (classes[i] == X86_64_SSEUP_CLASS
6637 && classes[i - 1] != X86_64_SSE_CLASS
6638 && classes[i - 1] != X86_64_SSEUP_CLASS)
6640 /* The first one should never be X86_64_SSEUP_CLASS. */
6641 gcc_assert (i != 0);
6642 classes[i] = X86_64_SSE_CLASS;
6645 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6646 everything should be passed in memory. */
6647 if (classes[i] == X86_64_X87UP_CLASS
6648 && (classes[i - 1] != X86_64_X87_CLASS))
6650 static bool warned;
6652 /* The first one should never be X86_64_X87UP_CLASS. */
6653 gcc_assert (i != 0);
6654 if (!warned && warn_psabi)
6656 warned = true;
6657 inform (input_location,
6658 "the ABI of passing union with long double"
6659 " has changed in GCC 4.4");
6661 return 0;
6664 return words;
6667 /* Compute alignment needed. We align all types to natural boundaries with
6668 exception of XFmode that is aligned to 64bits. */
6669 if (mode != VOIDmode && mode != BLKmode)
6671 int mode_alignment = GET_MODE_BITSIZE (mode);
6673 if (mode == XFmode)
6674 mode_alignment = 128;
6675 else if (mode == XCmode)
6676 mode_alignment = 256;
6677 if (COMPLEX_MODE_P (mode))
6678 mode_alignment /= 2;
6679 /* Misaligned fields are always returned in memory. */
6680 if (bit_offset % mode_alignment)
6681 return 0;
6684 /* for V1xx modes, just use the base mode */
6685 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6686 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6687 mode = GET_MODE_INNER (mode);
6689 /* Classification of atomic types. */
6690 switch (mode)
6692 case SDmode:
6693 case DDmode:
6694 classes[0] = X86_64_SSE_CLASS;
6695 return 1;
6696 case TDmode:
6697 classes[0] = X86_64_SSE_CLASS;
6698 classes[1] = X86_64_SSEUP_CLASS;
6699 return 2;
6700 case DImode:
6701 case SImode:
6702 case HImode:
6703 case QImode:
6704 case CSImode:
6705 case CHImode:
6706 case CQImode:
6708 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6710 /* Analyze last 128 bits only. */
6711 size = (size - 1) & 0x7f;
6713 if (size < 32)
6715 classes[0] = X86_64_INTEGERSI_CLASS;
6716 return 1;
6718 else if (size < 64)
6720 classes[0] = X86_64_INTEGER_CLASS;
6721 return 1;
6723 else if (size < 64+32)
6725 classes[0] = X86_64_INTEGER_CLASS;
6726 classes[1] = X86_64_INTEGERSI_CLASS;
6727 return 2;
6729 else if (size < 64+64)
6731 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6732 return 2;
6734 else
6735 gcc_unreachable ();
6737 case CDImode:
6738 case TImode:
6739 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6740 return 2;
6741 case COImode:
6742 case OImode:
6743 /* OImode shouldn't be used directly. */
6744 gcc_unreachable ();
6745 case CTImode:
6746 return 0;
6747 case SFmode:
6748 if (!(bit_offset % 64))
6749 classes[0] = X86_64_SSESF_CLASS;
6750 else
6751 classes[0] = X86_64_SSE_CLASS;
6752 return 1;
6753 case DFmode:
6754 classes[0] = X86_64_SSEDF_CLASS;
6755 return 1;
6756 case XFmode:
6757 classes[0] = X86_64_X87_CLASS;
6758 classes[1] = X86_64_X87UP_CLASS;
6759 return 2;
6760 case TFmode:
6761 classes[0] = X86_64_SSE_CLASS;
6762 classes[1] = X86_64_SSEUP_CLASS;
6763 return 2;
6764 case SCmode:
6765 classes[0] = X86_64_SSE_CLASS;
6766 if (!(bit_offset % 64))
6767 return 1;
6768 else
6770 static bool warned;
6772 if (!warned && warn_psabi)
6774 warned = true;
6775 inform (input_location,
6776 "the ABI of passing structure with complex float"
6777 " member has changed in GCC 4.4");
6779 classes[1] = X86_64_SSESF_CLASS;
6780 return 2;
6782 case DCmode:
6783 classes[0] = X86_64_SSEDF_CLASS;
6784 classes[1] = X86_64_SSEDF_CLASS;
6785 return 2;
6786 case XCmode:
6787 classes[0] = X86_64_COMPLEX_X87_CLASS;
6788 return 1;
6789 case TCmode:
6790 /* This modes is larger than 16 bytes. */
6791 return 0;
6792 case V8SFmode:
6793 case V8SImode:
6794 case V32QImode:
6795 case V16HImode:
6796 case V4DFmode:
6797 case V4DImode:
6798 classes[0] = X86_64_SSE_CLASS;
6799 classes[1] = X86_64_SSEUP_CLASS;
6800 classes[2] = X86_64_SSEUP_CLASS;
6801 classes[3] = X86_64_SSEUP_CLASS;
6802 return 4;
6803 case V8DFmode:
6804 case V16SFmode:
6805 case V8DImode:
6806 case V16SImode:
6807 case V32HImode:
6808 case V64QImode:
6809 classes[0] = X86_64_SSE_CLASS;
6810 classes[1] = X86_64_SSEUP_CLASS;
6811 classes[2] = X86_64_SSEUP_CLASS;
6812 classes[3] = X86_64_SSEUP_CLASS;
6813 classes[4] = X86_64_SSEUP_CLASS;
6814 classes[5] = X86_64_SSEUP_CLASS;
6815 classes[6] = X86_64_SSEUP_CLASS;
6816 classes[7] = X86_64_SSEUP_CLASS;
6817 return 8;
6818 case V4SFmode:
6819 case V4SImode:
6820 case V16QImode:
6821 case V8HImode:
6822 case V2DFmode:
6823 case V2DImode:
6824 classes[0] = X86_64_SSE_CLASS;
6825 classes[1] = X86_64_SSEUP_CLASS;
6826 return 2;
6827 case V1TImode:
6828 case V1DImode:
6829 case V2SFmode:
6830 case V2SImode:
6831 case V4HImode:
6832 case V8QImode:
6833 classes[0] = X86_64_SSE_CLASS;
6834 return 1;
6835 case BLKmode:
6836 case VOIDmode:
6837 return 0;
6838 default:
6839 gcc_assert (VECTOR_MODE_P (mode));
6841 if (bytes > 16)
6842 return 0;
6844 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6846 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6847 classes[0] = X86_64_INTEGERSI_CLASS;
6848 else
6849 classes[0] = X86_64_INTEGER_CLASS;
6850 classes[1] = X86_64_INTEGER_CLASS;
6851 return 1 + (bytes > 8);
6855 /* Examine the argument and return set number of register required in each
6856 class. Return true iff parameter should be passed in memory. */
6858 static bool
6859 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6860 int *int_nregs, int *sse_nregs)
6862 enum x86_64_reg_class regclass[MAX_CLASSES];
6863 int n = classify_argument (mode, type, regclass, 0);
6865 *int_nregs = 0;
6866 *sse_nregs = 0;
6868 if (!n)
6869 return true;
6870 for (n--; n >= 0; n--)
6871 switch (regclass[n])
6873 case X86_64_INTEGER_CLASS:
6874 case X86_64_INTEGERSI_CLASS:
6875 (*int_nregs)++;
6876 break;
6877 case X86_64_SSE_CLASS:
6878 case X86_64_SSESF_CLASS:
6879 case X86_64_SSEDF_CLASS:
6880 (*sse_nregs)++;
6881 break;
6882 case X86_64_NO_CLASS:
6883 case X86_64_SSEUP_CLASS:
6884 break;
6885 case X86_64_X87_CLASS:
6886 case X86_64_X87UP_CLASS:
6887 case X86_64_COMPLEX_X87_CLASS:
6888 if (!in_return)
6889 return true;
6890 break;
6891 case X86_64_MEMORY_CLASS:
6892 gcc_unreachable ();
6895 return false;
6898 /* Construct container for the argument used by GCC interface. See
6899 FUNCTION_ARG for the detailed description. */
6901 static rtx
6902 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6903 const_tree type, int in_return, int nintregs, int nsseregs,
6904 const int *intreg, int sse_regno)
6906 /* The following variables hold the static issued_error state. */
6907 static bool issued_sse_arg_error;
6908 static bool issued_sse_ret_error;
6909 static bool issued_x87_ret_error;
6911 enum machine_mode tmpmode;
6912 int bytes =
6913 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6914 enum x86_64_reg_class regclass[MAX_CLASSES];
6915 int n;
6916 int i;
6917 int nexps = 0;
6918 int needed_sseregs, needed_intregs;
6919 rtx exp[MAX_CLASSES];
6920 rtx ret;
6922 n = classify_argument (mode, type, regclass, 0);
6923 if (!n)
6924 return NULL;
6925 if (examine_argument (mode, type, in_return, &needed_intregs,
6926 &needed_sseregs))
6927 return NULL;
6928 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6929 return NULL;
6931 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6932 some less clueful developer tries to use floating-point anyway. */
6933 if (needed_sseregs && !TARGET_SSE)
6935 if (in_return)
6937 if (!issued_sse_ret_error)
6939 error ("SSE register return with SSE disabled");
6940 issued_sse_ret_error = true;
6943 else if (!issued_sse_arg_error)
6945 error ("SSE register argument with SSE disabled");
6946 issued_sse_arg_error = true;
6948 return NULL;
6951 /* Likewise, error if the ABI requires us to return values in the
6952 x87 registers and the user specified -mno-80387. */
6953 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6954 for (i = 0; i < n; i++)
6955 if (regclass[i] == X86_64_X87_CLASS
6956 || regclass[i] == X86_64_X87UP_CLASS
6957 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6959 if (!issued_x87_ret_error)
6961 error ("x87 register return with x87 disabled");
6962 issued_x87_ret_error = true;
6964 return NULL;
6967 /* First construct simple cases. Avoid SCmode, since we want to use
6968 single register to pass this type. */
6969 if (n == 1 && mode != SCmode)
6970 switch (regclass[0])
6972 case X86_64_INTEGER_CLASS:
6973 case X86_64_INTEGERSI_CLASS:
6974 return gen_rtx_REG (mode, intreg[0]);
6975 case X86_64_SSE_CLASS:
6976 case X86_64_SSESF_CLASS:
6977 case X86_64_SSEDF_CLASS:
6978 if (mode != BLKmode)
6979 return gen_reg_or_parallel (mode, orig_mode,
6980 SSE_REGNO (sse_regno));
6981 break;
6982 case X86_64_X87_CLASS:
6983 case X86_64_COMPLEX_X87_CLASS:
6984 return gen_rtx_REG (mode, FIRST_STACK_REG);
6985 case X86_64_NO_CLASS:
6986 /* Zero sized array, struct or class. */
6987 return NULL;
6988 default:
6989 gcc_unreachable ();
6991 if (n == 2
6992 && regclass[0] == X86_64_SSE_CLASS
6993 && regclass[1] == X86_64_SSEUP_CLASS
6994 && mode != BLKmode)
6995 return gen_reg_or_parallel (mode, orig_mode,
6996 SSE_REGNO (sse_regno));
6997 if (n == 4
6998 && regclass[0] == X86_64_SSE_CLASS
6999 && regclass[1] == X86_64_SSEUP_CLASS
7000 && regclass[2] == X86_64_SSEUP_CLASS
7001 && regclass[3] == X86_64_SSEUP_CLASS
7002 && mode != BLKmode)
7003 return gen_reg_or_parallel (mode, orig_mode,
7004 SSE_REGNO (sse_regno));
7005 if (n == 8
7006 && regclass[0] == X86_64_SSE_CLASS
7007 && regclass[1] == X86_64_SSEUP_CLASS
7008 && regclass[2] == X86_64_SSEUP_CLASS
7009 && regclass[3] == X86_64_SSEUP_CLASS
7010 && regclass[4] == X86_64_SSEUP_CLASS
7011 && regclass[5] == X86_64_SSEUP_CLASS
7012 && regclass[6] == X86_64_SSEUP_CLASS
7013 && regclass[7] == X86_64_SSEUP_CLASS
7014 && mode != BLKmode)
7015 return gen_reg_or_parallel (mode, orig_mode,
7016 SSE_REGNO (sse_regno));
7017 if (n == 2
7018 && regclass[0] == X86_64_X87_CLASS
7019 && regclass[1] == X86_64_X87UP_CLASS)
7020 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7022 if (n == 2
7023 && regclass[0] == X86_64_INTEGER_CLASS
7024 && regclass[1] == X86_64_INTEGER_CLASS
7025 && (mode == CDImode || mode == TImode)
7026 && intreg[0] + 1 == intreg[1])
7027 return gen_rtx_REG (mode, intreg[0]);
7029 /* Otherwise figure out the entries of the PARALLEL. */
7030 for (i = 0; i < n; i++)
7032 int pos;
7034 switch (regclass[i])
7036 case X86_64_NO_CLASS:
7037 break;
7038 case X86_64_INTEGER_CLASS:
7039 case X86_64_INTEGERSI_CLASS:
7040 /* Merge TImodes on aligned occasions here too. */
7041 if (i * 8 + 8 > bytes)
7042 tmpmode
7043 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7044 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7045 tmpmode = SImode;
7046 else
7047 tmpmode = DImode;
7048 /* We've requested 24 bytes we
7049 don't have mode for. Use DImode. */
7050 if (tmpmode == BLKmode)
7051 tmpmode = DImode;
7052 exp [nexps++]
7053 = gen_rtx_EXPR_LIST (VOIDmode,
7054 gen_rtx_REG (tmpmode, *intreg),
7055 GEN_INT (i*8));
7056 intreg++;
7057 break;
7058 case X86_64_SSESF_CLASS:
7059 exp [nexps++]
7060 = gen_rtx_EXPR_LIST (VOIDmode,
7061 gen_rtx_REG (SFmode,
7062 SSE_REGNO (sse_regno)),
7063 GEN_INT (i*8));
7064 sse_regno++;
7065 break;
7066 case X86_64_SSEDF_CLASS:
7067 exp [nexps++]
7068 = gen_rtx_EXPR_LIST (VOIDmode,
7069 gen_rtx_REG (DFmode,
7070 SSE_REGNO (sse_regno)),
7071 GEN_INT (i*8));
7072 sse_regno++;
7073 break;
7074 case X86_64_SSE_CLASS:
7075 pos = i;
7076 switch (n)
7078 case 1:
7079 tmpmode = DImode;
7080 break;
7081 case 2:
7082 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7084 tmpmode = TImode;
7085 i++;
7087 else
7088 tmpmode = DImode;
7089 break;
7090 case 4:
7091 gcc_assert (i == 0
7092 && regclass[1] == X86_64_SSEUP_CLASS
7093 && regclass[2] == X86_64_SSEUP_CLASS
7094 && regclass[3] == X86_64_SSEUP_CLASS);
7095 tmpmode = OImode;
7096 i += 3;
7097 break;
7098 case 8:
7099 gcc_assert (i == 0
7100 && regclass[1] == X86_64_SSEUP_CLASS
7101 && regclass[2] == X86_64_SSEUP_CLASS
7102 && regclass[3] == X86_64_SSEUP_CLASS
7103 && regclass[4] == X86_64_SSEUP_CLASS
7104 && regclass[5] == X86_64_SSEUP_CLASS
7105 && regclass[6] == X86_64_SSEUP_CLASS
7106 && regclass[7] == X86_64_SSEUP_CLASS);
7107 tmpmode = XImode;
7108 i += 7;
7109 break;
7110 default:
7111 gcc_unreachable ();
7113 exp [nexps++]
7114 = gen_rtx_EXPR_LIST (VOIDmode,
7115 gen_rtx_REG (tmpmode,
7116 SSE_REGNO (sse_regno)),
7117 GEN_INT (pos*8));
7118 sse_regno++;
7119 break;
7120 default:
7121 gcc_unreachable ();
7125 /* Empty aligned struct, union or class. */
7126 if (nexps == 0)
7127 return NULL;
7129 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7130 for (i = 0; i < nexps; i++)
7131 XVECEXP (ret, 0, i) = exp [i];
7132 return ret;
7135 /* Update the data in CUM to advance over an argument of mode MODE
7136 and data type TYPE. (TYPE is null for libcalls where that information
7137 may not be available.) */
7139 static void
7140 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7141 const_tree type, HOST_WIDE_INT bytes,
7142 HOST_WIDE_INT words)
7144 switch (mode)
7146 default:
7147 break;
7149 case BLKmode:
7150 if (bytes < 0)
7151 break;
7152 /* FALLTHRU */
7154 case DImode:
7155 case SImode:
7156 case HImode:
7157 case QImode:
7158 cum->words += words;
7159 cum->nregs -= words;
7160 cum->regno += words;
7162 if (cum->nregs <= 0)
7164 cum->nregs = 0;
7165 cum->regno = 0;
7167 break;
7169 case OImode:
7170 /* OImode shouldn't be used directly. */
7171 gcc_unreachable ();
7173 case DFmode:
7174 if (cum->float_in_sse < 2)
7175 break;
7176 case SFmode:
7177 if (cum->float_in_sse < 1)
7178 break;
7179 /* FALLTHRU */
7181 case V8SFmode:
7182 case V8SImode:
7183 case V64QImode:
7184 case V32HImode:
7185 case V16SImode:
7186 case V8DImode:
7187 case V16SFmode:
7188 case V8DFmode:
7189 case V32QImode:
7190 case V16HImode:
7191 case V4DFmode:
7192 case V4DImode:
7193 case TImode:
7194 case V16QImode:
7195 case V8HImode:
7196 case V4SImode:
7197 case V2DImode:
7198 case V4SFmode:
7199 case V2DFmode:
7200 if (!type || !AGGREGATE_TYPE_P (type))
7202 cum->sse_words += words;
7203 cum->sse_nregs -= 1;
7204 cum->sse_regno += 1;
7205 if (cum->sse_nregs <= 0)
7207 cum->sse_nregs = 0;
7208 cum->sse_regno = 0;
7211 break;
7213 case V8QImode:
7214 case V4HImode:
7215 case V2SImode:
7216 case V2SFmode:
7217 case V1TImode:
7218 case V1DImode:
7219 if (!type || !AGGREGATE_TYPE_P (type))
7221 cum->mmx_words += words;
7222 cum->mmx_nregs -= 1;
7223 cum->mmx_regno += 1;
7224 if (cum->mmx_nregs <= 0)
7226 cum->mmx_nregs = 0;
7227 cum->mmx_regno = 0;
7230 break;
7234 static void
7235 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7236 const_tree type, HOST_WIDE_INT words, bool named)
7238 int int_nregs, sse_nregs;
7240 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7241 if (!named && (VALID_AVX512F_REG_MODE (mode)
7242 || VALID_AVX256_REG_MODE (mode)))
7243 return;
7245 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7246 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7248 cum->nregs -= int_nregs;
7249 cum->sse_nregs -= sse_nregs;
7250 cum->regno += int_nregs;
7251 cum->sse_regno += sse_nregs;
7253 else
7255 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7256 cum->words = (cum->words + align - 1) & ~(align - 1);
7257 cum->words += words;
7261 static void
7262 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7263 HOST_WIDE_INT words)
7265 /* Otherwise, this should be passed indirect. */
7266 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7268 cum->words += words;
7269 if (cum->nregs > 0)
7271 cum->nregs -= 1;
7272 cum->regno += 1;
7276 /* Update the data in CUM to advance over an argument of mode MODE and
7277 data type TYPE. (TYPE is null for libcalls where that information
7278 may not be available.) */
7280 static void
7281 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7282 const_tree type, bool named)
7284 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7285 HOST_WIDE_INT bytes, words;
7287 if (mode == BLKmode)
7288 bytes = int_size_in_bytes (type);
7289 else
7290 bytes = GET_MODE_SIZE (mode);
7291 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7293 if (type)
7294 mode = type_natural_mode (type, NULL, false);
7296 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7297 function_arg_advance_ms_64 (cum, bytes, words);
7298 else if (TARGET_64BIT)
7299 function_arg_advance_64 (cum, mode, type, words, named);
7300 else
7301 function_arg_advance_32 (cum, mode, type, bytes, words);
7304 /* Define where to put the arguments to a function.
7305 Value is zero to push the argument on the stack,
7306 or a hard register in which to store the argument.
7308 MODE is the argument's machine mode.
7309 TYPE is the data type of the argument (as a tree).
7310 This is null for libcalls where that information may
7311 not be available.
7312 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7313 the preceding args and about the function being called.
7314 NAMED is nonzero if this argument is a named parameter
7315 (otherwise it is an extra parameter matching an ellipsis). */
7317 static rtx
7318 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7319 enum machine_mode orig_mode, const_tree type,
7320 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7322 /* Avoid the AL settings for the Unix64 ABI. */
7323 if (mode == VOIDmode)
7324 return constm1_rtx;
7326 switch (mode)
7328 default:
7329 break;
7331 case BLKmode:
7332 if (bytes < 0)
7333 break;
7334 /* FALLTHRU */
7335 case DImode:
7336 case SImode:
7337 case HImode:
7338 case QImode:
7339 if (words <= cum->nregs)
7341 int regno = cum->regno;
7343 /* Fastcall allocates the first two DWORD (SImode) or
7344 smaller arguments to ECX and EDX if it isn't an
7345 aggregate type . */
7346 if (cum->fastcall)
7348 if (mode == BLKmode
7349 || mode == DImode
7350 || (type && AGGREGATE_TYPE_P (type)))
7351 break;
7353 /* ECX not EAX is the first allocated register. */
7354 if (regno == AX_REG)
7355 regno = CX_REG;
7357 return gen_rtx_REG (mode, regno);
7359 break;
7361 case DFmode:
7362 if (cum->float_in_sse < 2)
7363 break;
7364 case SFmode:
7365 if (cum->float_in_sse < 1)
7366 break;
7367 /* FALLTHRU */
7368 case TImode:
7369 /* In 32bit, we pass TImode in xmm registers. */
7370 case V16QImode:
7371 case V8HImode:
7372 case V4SImode:
7373 case V2DImode:
7374 case V4SFmode:
7375 case V2DFmode:
7376 if (!type || !AGGREGATE_TYPE_P (type))
7378 if (cum->sse_nregs)
7379 return gen_reg_or_parallel (mode, orig_mode,
7380 cum->sse_regno + FIRST_SSE_REG);
7382 break;
7384 case OImode:
7385 case XImode:
7386 /* OImode and XImode shouldn't be used directly. */
7387 gcc_unreachable ();
7389 case V64QImode:
7390 case V32HImode:
7391 case V16SImode:
7392 case V8DImode:
7393 case V16SFmode:
7394 case V8DFmode:
7395 case V8SFmode:
7396 case V8SImode:
7397 case V32QImode:
7398 case V16HImode:
7399 case V4DFmode:
7400 case V4DImode:
7401 if (!type || !AGGREGATE_TYPE_P (type))
7403 if (cum->sse_nregs)
7404 return gen_reg_or_parallel (mode, orig_mode,
7405 cum->sse_regno + FIRST_SSE_REG);
7407 break;
7409 case V8QImode:
7410 case V4HImode:
7411 case V2SImode:
7412 case V2SFmode:
7413 case V1TImode:
7414 case V1DImode:
7415 if (!type || !AGGREGATE_TYPE_P (type))
7417 if (cum->mmx_nregs)
7418 return gen_reg_or_parallel (mode, orig_mode,
7419 cum->mmx_regno + FIRST_MMX_REG);
7421 break;
7424 return NULL_RTX;
7427 static rtx
7428 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7429 enum machine_mode orig_mode, const_tree type, bool named)
7431 /* Handle a hidden AL argument containing number of registers
7432 for varargs x86-64 functions. */
7433 if (mode == VOIDmode)
7434 return GEN_INT (cum->maybe_vaarg
7435 ? (cum->sse_nregs < 0
7436 ? X86_64_SSE_REGPARM_MAX
7437 : cum->sse_regno)
7438 : -1);
7440 switch (mode)
7442 default:
7443 break;
7445 case V8SFmode:
7446 case V8SImode:
7447 case V32QImode:
7448 case V16HImode:
7449 case V4DFmode:
7450 case V4DImode:
7451 case V16SFmode:
7452 case V16SImode:
7453 case V64QImode:
7454 case V32HImode:
7455 case V8DFmode:
7456 case V8DImode:
7457 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7458 if (!named)
7459 return NULL;
7460 break;
7463 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7464 cum->sse_nregs,
7465 &x86_64_int_parameter_registers [cum->regno],
7466 cum->sse_regno);
7469 static rtx
7470 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7471 enum machine_mode orig_mode, bool named,
7472 HOST_WIDE_INT bytes)
7474 unsigned int regno;
7476 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7477 We use value of -2 to specify that current function call is MSABI. */
7478 if (mode == VOIDmode)
7479 return GEN_INT (-2);
7481 /* If we've run out of registers, it goes on the stack. */
7482 if (cum->nregs == 0)
7483 return NULL_RTX;
7485 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7487 /* Only floating point modes are passed in anything but integer regs. */
7488 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7490 if (named)
7491 regno = cum->regno + FIRST_SSE_REG;
7492 else
7494 rtx t1, t2;
7496 /* Unnamed floating parameters are passed in both the
7497 SSE and integer registers. */
7498 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7499 t2 = gen_rtx_REG (mode, regno);
7500 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7501 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7502 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7505 /* Handle aggregated types passed in register. */
7506 if (orig_mode == BLKmode)
7508 if (bytes > 0 && bytes <= 8)
7509 mode = (bytes > 4 ? DImode : SImode);
7510 if (mode == BLKmode)
7511 mode = DImode;
7514 return gen_reg_or_parallel (mode, orig_mode, regno);
7517 /* Return where to put the arguments to a function.
7518 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7520 MODE is the argument's machine mode. TYPE is the data type of the
7521 argument. It is null for libcalls where that information may not be
7522 available. CUM gives information about the preceding args and about
7523 the function being called. NAMED is nonzero if this argument is a
7524 named parameter (otherwise it is an extra parameter matching an
7525 ellipsis). */
7527 static rtx
7528 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7529 const_tree type, bool named)
7531 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7532 enum machine_mode mode = omode;
7533 HOST_WIDE_INT bytes, words;
7534 rtx arg;
7536 if (mode == BLKmode)
7537 bytes = int_size_in_bytes (type);
7538 else
7539 bytes = GET_MODE_SIZE (mode);
7540 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7542 /* To simplify the code below, represent vector types with a vector mode
7543 even if MMX/SSE are not active. */
7544 if (type && TREE_CODE (type) == VECTOR_TYPE)
7545 mode = type_natural_mode (type, cum, false);
7547 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7548 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7549 else if (TARGET_64BIT)
7550 arg = function_arg_64 (cum, mode, omode, type, named);
7551 else
7552 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7554 return arg;
7557 /* A C expression that indicates when an argument must be passed by
7558 reference. If nonzero for an argument, a copy of that argument is
7559 made in memory and a pointer to the argument is passed instead of
7560 the argument itself. The pointer is passed in whatever way is
7561 appropriate for passing a pointer to that type. */
7563 static bool
7564 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7565 const_tree type, bool named ATTRIBUTE_UNUSED)
7567 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7569 /* See Windows x64 Software Convention. */
7570 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7572 int msize = (int) GET_MODE_SIZE (mode);
7573 if (type)
7575 /* Arrays are passed by reference. */
7576 if (TREE_CODE (type) == ARRAY_TYPE)
7577 return true;
7579 if (AGGREGATE_TYPE_P (type))
7581 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7582 are passed by reference. */
7583 msize = int_size_in_bytes (type);
7587 /* __m128 is passed by reference. */
7588 switch (msize) {
7589 case 1: case 2: case 4: case 8:
7590 break;
7591 default:
7592 return true;
7595 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7596 return 1;
7598 return 0;
7601 /* Return true when TYPE should be 128bit aligned for 32bit argument
7602 passing ABI. XXX: This function is obsolete and is only used for
7603 checking psABI compatibility with previous versions of GCC. */
7605 static bool
7606 ix86_compat_aligned_value_p (const_tree type)
7608 enum machine_mode mode = TYPE_MODE (type);
7609 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7610 || mode == TDmode
7611 || mode == TFmode
7612 || mode == TCmode)
7613 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7614 return true;
7615 if (TYPE_ALIGN (type) < 128)
7616 return false;
7618 if (AGGREGATE_TYPE_P (type))
7620 /* Walk the aggregates recursively. */
7621 switch (TREE_CODE (type))
7623 case RECORD_TYPE:
7624 case UNION_TYPE:
7625 case QUAL_UNION_TYPE:
7627 tree field;
7629 /* Walk all the structure fields. */
7630 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7632 if (TREE_CODE (field) == FIELD_DECL
7633 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7634 return true;
7636 break;
7639 case ARRAY_TYPE:
7640 /* Just for use if some languages passes arrays by value. */
7641 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7642 return true;
7643 break;
7645 default:
7646 gcc_unreachable ();
7649 return false;
7652 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7653 XXX: This function is obsolete and is only used for checking psABI
7654 compatibility with previous versions of GCC. */
7656 static unsigned int
7657 ix86_compat_function_arg_boundary (enum machine_mode mode,
7658 const_tree type, unsigned int align)
7660 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7661 natural boundaries. */
7662 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7664 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7665 make an exception for SSE modes since these require 128bit
7666 alignment.
7668 The handling here differs from field_alignment. ICC aligns MMX
7669 arguments to 4 byte boundaries, while structure fields are aligned
7670 to 8 byte boundaries. */
7671 if (!type)
7673 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7674 align = PARM_BOUNDARY;
7676 else
7678 if (!ix86_compat_aligned_value_p (type))
7679 align = PARM_BOUNDARY;
7682 if (align > BIGGEST_ALIGNMENT)
7683 align = BIGGEST_ALIGNMENT;
7684 return align;
7687 /* Return true when TYPE should be 128bit aligned for 32bit argument
7688 passing ABI. */
7690 static bool
7691 ix86_contains_aligned_value_p (const_tree type)
7693 enum machine_mode mode = TYPE_MODE (type);
7695 if (mode == XFmode || mode == XCmode)
7696 return false;
7698 if (TYPE_ALIGN (type) < 128)
7699 return false;
7701 if (AGGREGATE_TYPE_P (type))
7703 /* Walk the aggregates recursively. */
7704 switch (TREE_CODE (type))
7706 case RECORD_TYPE:
7707 case UNION_TYPE:
7708 case QUAL_UNION_TYPE:
7710 tree field;
7712 /* Walk all the structure fields. */
7713 for (field = TYPE_FIELDS (type);
7714 field;
7715 field = DECL_CHAIN (field))
7717 if (TREE_CODE (field) == FIELD_DECL
7718 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7719 return true;
7721 break;
7724 case ARRAY_TYPE:
7725 /* Just for use if some languages passes arrays by value. */
7726 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7727 return true;
7728 break;
7730 default:
7731 gcc_unreachable ();
7734 else
7735 return TYPE_ALIGN (type) >= 128;
7737 return false;
7740 /* Gives the alignment boundary, in bits, of an argument with the
7741 specified mode and type. */
7743 static unsigned int
7744 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7746 unsigned int align;
7747 if (type)
7749 /* Since the main variant type is used for call, we convert it to
7750 the main variant type. */
7751 type = TYPE_MAIN_VARIANT (type);
7752 align = TYPE_ALIGN (type);
7754 else
7755 align = GET_MODE_ALIGNMENT (mode);
7756 if (align < PARM_BOUNDARY)
7757 align = PARM_BOUNDARY;
7758 else
7760 static bool warned;
7761 unsigned int saved_align = align;
7763 if (!TARGET_64BIT)
7765 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7766 if (!type)
7768 if (mode == XFmode || mode == XCmode)
7769 align = PARM_BOUNDARY;
7771 else if (!ix86_contains_aligned_value_p (type))
7772 align = PARM_BOUNDARY;
7774 if (align < 128)
7775 align = PARM_BOUNDARY;
7778 if (warn_psabi
7779 && !warned
7780 && align != ix86_compat_function_arg_boundary (mode, type,
7781 saved_align))
7783 warned = true;
7784 inform (input_location,
7785 "The ABI for passing parameters with %d-byte"
7786 " alignment has changed in GCC 4.6",
7787 align / BITS_PER_UNIT);
7791 return align;
7794 /* Return true if N is a possible register number of function value. */
7796 static bool
7797 ix86_function_value_regno_p (const unsigned int regno)
7799 switch (regno)
7801 case AX_REG:
7802 return true;
7803 case DX_REG:
7804 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7805 case DI_REG:
7806 case SI_REG:
7807 return TARGET_64BIT && ix86_abi != MS_ABI;
7809 /* Complex values are returned in %st(0)/%st(1) pair. */
7810 case ST0_REG:
7811 case ST1_REG:
7812 /* TODO: The function should depend on current function ABI but
7813 builtins.c would need updating then. Therefore we use the
7814 default ABI. */
7815 if (TARGET_64BIT && ix86_abi == MS_ABI)
7816 return false;
7817 return TARGET_FLOAT_RETURNS_IN_80387;
7819 /* Complex values are returned in %xmm0/%xmm1 pair. */
7820 case XMM0_REG:
7821 case XMM1_REG:
7822 return TARGET_SSE;
7824 case MM0_REG:
7825 if (TARGET_MACHO || TARGET_64BIT)
7826 return false;
7827 return TARGET_MMX;
7830 return false;
7833 /* Define how to find the value returned by a function.
7834 VALTYPE is the data type of the value (as a tree).
7835 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7836 otherwise, FUNC is 0. */
7838 static rtx
7839 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7840 const_tree fntype, const_tree fn)
7842 unsigned int regno;
7844 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7845 we normally prevent this case when mmx is not available. However
7846 some ABIs may require the result to be returned like DImode. */
7847 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7848 regno = FIRST_MMX_REG;
7850 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7851 we prevent this case when sse is not available. However some ABIs
7852 may require the result to be returned like integer TImode. */
7853 else if (mode == TImode
7854 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7855 regno = FIRST_SSE_REG;
7857 /* 32-byte vector modes in %ymm0. */
7858 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7859 regno = FIRST_SSE_REG;
7861 /* 64-byte vector modes in %zmm0. */
7862 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7863 regno = FIRST_SSE_REG;
7865 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7866 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7867 regno = FIRST_FLOAT_REG;
7868 else
7869 /* Most things go in %eax. */
7870 regno = AX_REG;
7872 /* Override FP return register with %xmm0 for local functions when
7873 SSE math is enabled or for functions with sseregparm attribute. */
7874 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7876 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7877 if ((sse_level >= 1 && mode == SFmode)
7878 || (sse_level == 2 && mode == DFmode))
7879 regno = FIRST_SSE_REG;
7882 /* OImode shouldn't be used directly. */
7883 gcc_assert (mode != OImode);
7885 return gen_rtx_REG (orig_mode, regno);
7888 static rtx
7889 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7890 const_tree valtype)
7892 rtx ret;
7894 /* Handle libcalls, which don't provide a type node. */
7895 if (valtype == NULL)
7897 unsigned int regno;
7899 switch (mode)
7901 case SFmode:
7902 case SCmode:
7903 case DFmode:
7904 case DCmode:
7905 case TFmode:
7906 case SDmode:
7907 case DDmode:
7908 case TDmode:
7909 regno = FIRST_SSE_REG;
7910 break;
7911 case XFmode:
7912 case XCmode:
7913 regno = FIRST_FLOAT_REG;
7914 break;
7915 case TCmode:
7916 return NULL;
7917 default:
7918 regno = AX_REG;
7921 return gen_rtx_REG (mode, regno);
7923 else if (POINTER_TYPE_P (valtype))
7925 /* Pointers are always returned in word_mode. */
7926 mode = word_mode;
7929 ret = construct_container (mode, orig_mode, valtype, 1,
7930 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7931 x86_64_int_return_registers, 0);
7933 /* For zero sized structures, construct_container returns NULL, but we
7934 need to keep rest of compiler happy by returning meaningful value. */
7935 if (!ret)
7936 ret = gen_rtx_REG (orig_mode, AX_REG);
7938 return ret;
7941 static rtx
7942 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7943 const_tree valtype)
7945 unsigned int regno = AX_REG;
7947 if (TARGET_SSE)
7949 switch (GET_MODE_SIZE (mode))
7951 case 16:
7952 if (valtype != NULL_TREE
7953 && !VECTOR_INTEGER_TYPE_P (valtype)
7954 && !VECTOR_INTEGER_TYPE_P (valtype)
7955 && !INTEGRAL_TYPE_P (valtype)
7956 && !VECTOR_FLOAT_TYPE_P (valtype))
7957 break;
7958 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7959 && !COMPLEX_MODE_P (mode))
7960 regno = FIRST_SSE_REG;
7961 break;
7962 case 8:
7963 case 4:
7964 if (mode == SFmode || mode == DFmode)
7965 regno = FIRST_SSE_REG;
7966 break;
7967 default:
7968 break;
7971 return gen_rtx_REG (orig_mode, regno);
7974 static rtx
7975 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7976 enum machine_mode orig_mode, enum machine_mode mode)
7978 const_tree fn, fntype;
7980 fn = NULL_TREE;
7981 if (fntype_or_decl && DECL_P (fntype_or_decl))
7982 fn = fntype_or_decl;
7983 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7985 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7986 return function_value_ms_64 (orig_mode, mode, valtype);
7987 else if (TARGET_64BIT)
7988 return function_value_64 (orig_mode, mode, valtype);
7989 else
7990 return function_value_32 (orig_mode, mode, fntype, fn);
7993 static rtx
7994 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7995 bool outgoing ATTRIBUTE_UNUSED)
7997 enum machine_mode mode, orig_mode;
7999 orig_mode = TYPE_MODE (valtype);
8000 mode = type_natural_mode (valtype, NULL, true);
8001 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8004 /* Pointer function arguments and return values are promoted to
8005 word_mode. */
8007 static enum machine_mode
8008 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8009 int *punsignedp, const_tree fntype,
8010 int for_return)
8012 if (type != NULL_TREE && POINTER_TYPE_P (type))
8014 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8015 return word_mode;
8017 return default_promote_function_mode (type, mode, punsignedp, fntype,
8018 for_return);
8021 /* Return true if a structure, union or array with MODE containing FIELD
8022 should be accessed using BLKmode. */
8024 static bool
8025 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8027 /* Union with XFmode must be in BLKmode. */
8028 return (mode == XFmode
8029 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8030 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8034 ix86_libcall_value (enum machine_mode mode)
8036 return ix86_function_value_1 (NULL, NULL, mode, mode);
8039 /* Return true iff type is returned in memory. */
8041 static bool
8042 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8044 #ifdef SUBTARGET_RETURN_IN_MEMORY
8045 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8046 #else
8047 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8048 HOST_WIDE_INT size;
8050 if (TARGET_64BIT)
8052 if (ix86_function_type_abi (fntype) == MS_ABI)
8054 size = int_size_in_bytes (type);
8056 /* __m128 is returned in xmm0. */
8057 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8058 || INTEGRAL_TYPE_P (type)
8059 || VECTOR_FLOAT_TYPE_P (type))
8060 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8061 && !COMPLEX_MODE_P (mode)
8062 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8063 return false;
8065 /* Otherwise, the size must be exactly in [1248]. */
8066 return size != 1 && size != 2 && size != 4 && size != 8;
8068 else
8070 int needed_intregs, needed_sseregs;
8072 return examine_argument (mode, type, 1,
8073 &needed_intregs, &needed_sseregs);
8076 else
8078 if (mode == BLKmode)
8079 return true;
8081 size = int_size_in_bytes (type);
8083 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8084 return false;
8086 if (VECTOR_MODE_P (mode) || mode == TImode)
8088 /* User-created vectors small enough to fit in EAX. */
8089 if (size < 8)
8090 return false;
8092 /* Unless ABI prescibes otherwise,
8093 MMX/3dNow values are returned in MM0 if available. */
8095 if (size == 8)
8096 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8098 /* SSE values are returned in XMM0 if available. */
8099 if (size == 16)
8100 return !TARGET_SSE;
8102 /* AVX values are returned in YMM0 if available. */
8103 if (size == 32)
8104 return !TARGET_AVX;
8106 /* AVX512F values are returned in ZMM0 if available. */
8107 if (size == 64)
8108 return !TARGET_AVX512F;
8111 if (mode == XFmode)
8112 return false;
8114 if (size > 12)
8115 return true;
8117 /* OImode shouldn't be used directly. */
8118 gcc_assert (mode != OImode);
8120 return false;
8122 #endif
8126 /* Create the va_list data type. */
8128 /* Returns the calling convention specific va_list date type.
8129 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8131 static tree
8132 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8134 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8136 /* For i386 we use plain pointer to argument area. */
8137 if (!TARGET_64BIT || abi == MS_ABI)
8138 return build_pointer_type (char_type_node);
8140 record = lang_hooks.types.make_type (RECORD_TYPE);
8141 type_decl = build_decl (BUILTINS_LOCATION,
8142 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8144 f_gpr = build_decl (BUILTINS_LOCATION,
8145 FIELD_DECL, get_identifier ("gp_offset"),
8146 unsigned_type_node);
8147 f_fpr = build_decl (BUILTINS_LOCATION,
8148 FIELD_DECL, get_identifier ("fp_offset"),
8149 unsigned_type_node);
8150 f_ovf = build_decl (BUILTINS_LOCATION,
8151 FIELD_DECL, get_identifier ("overflow_arg_area"),
8152 ptr_type_node);
8153 f_sav = build_decl (BUILTINS_LOCATION,
8154 FIELD_DECL, get_identifier ("reg_save_area"),
8155 ptr_type_node);
8157 va_list_gpr_counter_field = f_gpr;
8158 va_list_fpr_counter_field = f_fpr;
8160 DECL_FIELD_CONTEXT (f_gpr) = record;
8161 DECL_FIELD_CONTEXT (f_fpr) = record;
8162 DECL_FIELD_CONTEXT (f_ovf) = record;
8163 DECL_FIELD_CONTEXT (f_sav) = record;
8165 TYPE_STUB_DECL (record) = type_decl;
8166 TYPE_NAME (record) = type_decl;
8167 TYPE_FIELDS (record) = f_gpr;
8168 DECL_CHAIN (f_gpr) = f_fpr;
8169 DECL_CHAIN (f_fpr) = f_ovf;
8170 DECL_CHAIN (f_ovf) = f_sav;
8172 layout_type (record);
8174 /* The correct type is an array type of one element. */
8175 return build_array_type (record, build_index_type (size_zero_node));
8178 /* Setup the builtin va_list data type and for 64-bit the additional
8179 calling convention specific va_list data types. */
8181 static tree
8182 ix86_build_builtin_va_list (void)
8184 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8186 /* Initialize abi specific va_list builtin types. */
8187 if (TARGET_64BIT)
8189 tree t;
8190 if (ix86_abi == MS_ABI)
8192 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8193 if (TREE_CODE (t) != RECORD_TYPE)
8194 t = build_variant_type_copy (t);
8195 sysv_va_list_type_node = t;
8197 else
8199 t = ret;
8200 if (TREE_CODE (t) != RECORD_TYPE)
8201 t = build_variant_type_copy (t);
8202 sysv_va_list_type_node = t;
8204 if (ix86_abi != MS_ABI)
8206 t = ix86_build_builtin_va_list_abi (MS_ABI);
8207 if (TREE_CODE (t) != RECORD_TYPE)
8208 t = build_variant_type_copy (t);
8209 ms_va_list_type_node = t;
8211 else
8213 t = ret;
8214 if (TREE_CODE (t) != RECORD_TYPE)
8215 t = build_variant_type_copy (t);
8216 ms_va_list_type_node = t;
8220 return ret;
8223 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8225 static void
8226 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8228 rtx save_area, mem;
8229 alias_set_type set;
8230 int i, max;
8232 /* GPR size of varargs save area. */
8233 if (cfun->va_list_gpr_size)
8234 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8235 else
8236 ix86_varargs_gpr_size = 0;
8238 /* FPR size of varargs save area. We don't need it if we don't pass
8239 anything in SSE registers. */
8240 if (TARGET_SSE && cfun->va_list_fpr_size)
8241 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8242 else
8243 ix86_varargs_fpr_size = 0;
8245 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8246 return;
8248 save_area = frame_pointer_rtx;
8249 set = get_varargs_alias_set ();
8251 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8252 if (max > X86_64_REGPARM_MAX)
8253 max = X86_64_REGPARM_MAX;
8255 for (i = cum->regno; i < max; i++)
8257 mem = gen_rtx_MEM (word_mode,
8258 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8259 MEM_NOTRAP_P (mem) = 1;
8260 set_mem_alias_set (mem, set);
8261 emit_move_insn (mem,
8262 gen_rtx_REG (word_mode,
8263 x86_64_int_parameter_registers[i]));
8266 if (ix86_varargs_fpr_size)
8268 enum machine_mode smode;
8269 rtx label, test;
8271 /* Now emit code to save SSE registers. The AX parameter contains number
8272 of SSE parameter registers used to call this function, though all we
8273 actually check here is the zero/non-zero status. */
8275 label = gen_label_rtx ();
8276 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8277 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8278 label));
8280 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8281 we used movdqa (i.e. TImode) instead? Perhaps even better would
8282 be if we could determine the real mode of the data, via a hook
8283 into pass_stdarg. Ignore all that for now. */
8284 smode = V4SFmode;
8285 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8286 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8288 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8289 if (max > X86_64_SSE_REGPARM_MAX)
8290 max = X86_64_SSE_REGPARM_MAX;
8292 for (i = cum->sse_regno; i < max; ++i)
8294 mem = plus_constant (Pmode, save_area,
8295 i * 16 + ix86_varargs_gpr_size);
8296 mem = gen_rtx_MEM (smode, mem);
8297 MEM_NOTRAP_P (mem) = 1;
8298 set_mem_alias_set (mem, set);
8299 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8301 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8304 emit_label (label);
8308 static void
8309 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8311 alias_set_type set = get_varargs_alias_set ();
8312 int i;
8314 /* Reset to zero, as there might be a sysv vaarg used
8315 before. */
8316 ix86_varargs_gpr_size = 0;
8317 ix86_varargs_fpr_size = 0;
8319 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8321 rtx reg, mem;
8323 mem = gen_rtx_MEM (Pmode,
8324 plus_constant (Pmode, virtual_incoming_args_rtx,
8325 i * UNITS_PER_WORD));
8326 MEM_NOTRAP_P (mem) = 1;
8327 set_mem_alias_set (mem, set);
8329 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8330 emit_move_insn (mem, reg);
8334 static void
8335 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8336 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8337 int no_rtl)
8339 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8340 CUMULATIVE_ARGS next_cum;
8341 tree fntype;
8343 /* This argument doesn't appear to be used anymore. Which is good,
8344 because the old code here didn't suppress rtl generation. */
8345 gcc_assert (!no_rtl);
8347 if (!TARGET_64BIT)
8348 return;
8350 fntype = TREE_TYPE (current_function_decl);
8352 /* For varargs, we do not want to skip the dummy va_dcl argument.
8353 For stdargs, we do want to skip the last named argument. */
8354 next_cum = *cum;
8355 if (stdarg_p (fntype))
8356 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8357 true);
8359 if (cum->call_abi == MS_ABI)
8360 setup_incoming_varargs_ms_64 (&next_cum);
8361 else
8362 setup_incoming_varargs_64 (&next_cum);
8365 /* Checks if TYPE is of kind va_list char *. */
8367 static bool
8368 is_va_list_char_pointer (tree type)
8370 tree canonic;
8372 /* For 32-bit it is always true. */
8373 if (!TARGET_64BIT)
8374 return true;
8375 canonic = ix86_canonical_va_list_type (type);
8376 return (canonic == ms_va_list_type_node
8377 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8380 /* Implement va_start. */
8382 static void
8383 ix86_va_start (tree valist, rtx nextarg)
8385 HOST_WIDE_INT words, n_gpr, n_fpr;
8386 tree f_gpr, f_fpr, f_ovf, f_sav;
8387 tree gpr, fpr, ovf, sav, t;
8388 tree type;
8389 rtx ovf_rtx;
8391 if (flag_split_stack
8392 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8394 unsigned int scratch_regno;
8396 /* When we are splitting the stack, we can't refer to the stack
8397 arguments using internal_arg_pointer, because they may be on
8398 the old stack. The split stack prologue will arrange to
8399 leave a pointer to the old stack arguments in a scratch
8400 register, which we here copy to a pseudo-register. The split
8401 stack prologue can't set the pseudo-register directly because
8402 it (the prologue) runs before any registers have been saved. */
8404 scratch_regno = split_stack_prologue_scratch_regno ();
8405 if (scratch_regno != INVALID_REGNUM)
8407 rtx reg, seq;
8409 reg = gen_reg_rtx (Pmode);
8410 cfun->machine->split_stack_varargs_pointer = reg;
8412 start_sequence ();
8413 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8414 seq = get_insns ();
8415 end_sequence ();
8417 push_topmost_sequence ();
8418 emit_insn_after (seq, entry_of_function ());
8419 pop_topmost_sequence ();
8423 /* Only 64bit target needs something special. */
8424 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8426 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8427 std_expand_builtin_va_start (valist, nextarg);
8428 else
8430 rtx va_r, next;
8432 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8433 next = expand_binop (ptr_mode, add_optab,
8434 cfun->machine->split_stack_varargs_pointer,
8435 crtl->args.arg_offset_rtx,
8436 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8437 convert_move (va_r, next, 0);
8439 return;
8442 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8443 f_fpr = DECL_CHAIN (f_gpr);
8444 f_ovf = DECL_CHAIN (f_fpr);
8445 f_sav = DECL_CHAIN (f_ovf);
8447 valist = build_simple_mem_ref (valist);
8448 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8449 /* The following should be folded into the MEM_REF offset. */
8450 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8451 f_gpr, NULL_TREE);
8452 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8453 f_fpr, NULL_TREE);
8454 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8455 f_ovf, NULL_TREE);
8456 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8457 f_sav, NULL_TREE);
8459 /* Count number of gp and fp argument registers used. */
8460 words = crtl->args.info.words;
8461 n_gpr = crtl->args.info.regno;
8462 n_fpr = crtl->args.info.sse_regno;
8464 if (cfun->va_list_gpr_size)
8466 type = TREE_TYPE (gpr);
8467 t = build2 (MODIFY_EXPR, type,
8468 gpr, build_int_cst (type, n_gpr * 8));
8469 TREE_SIDE_EFFECTS (t) = 1;
8470 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8473 if (TARGET_SSE && cfun->va_list_fpr_size)
8475 type = TREE_TYPE (fpr);
8476 t = build2 (MODIFY_EXPR, type, fpr,
8477 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8478 TREE_SIDE_EFFECTS (t) = 1;
8479 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8482 /* Find the overflow area. */
8483 type = TREE_TYPE (ovf);
8484 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8485 ovf_rtx = crtl->args.internal_arg_pointer;
8486 else
8487 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8488 t = make_tree (type, ovf_rtx);
8489 if (words != 0)
8490 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8491 t = build2 (MODIFY_EXPR, type, ovf, t);
8492 TREE_SIDE_EFFECTS (t) = 1;
8493 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8495 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8497 /* Find the register save area.
8498 Prologue of the function save it right above stack frame. */
8499 type = TREE_TYPE (sav);
8500 t = make_tree (type, frame_pointer_rtx);
8501 if (!ix86_varargs_gpr_size)
8502 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8503 t = build2 (MODIFY_EXPR, type, sav, t);
8504 TREE_SIDE_EFFECTS (t) = 1;
8505 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8509 /* Implement va_arg. */
8511 static tree
8512 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8513 gimple_seq *post_p)
8515 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8516 tree f_gpr, f_fpr, f_ovf, f_sav;
8517 tree gpr, fpr, ovf, sav, t;
8518 int size, rsize;
8519 tree lab_false, lab_over = NULL_TREE;
8520 tree addr, t2;
8521 rtx container;
8522 int indirect_p = 0;
8523 tree ptrtype;
8524 enum machine_mode nat_mode;
8525 unsigned int arg_boundary;
8527 /* Only 64bit target needs something special. */
8528 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8529 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8531 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8532 f_fpr = DECL_CHAIN (f_gpr);
8533 f_ovf = DECL_CHAIN (f_fpr);
8534 f_sav = DECL_CHAIN (f_ovf);
8536 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8537 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8538 valist = build_va_arg_indirect_ref (valist);
8539 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8540 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8541 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8543 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8544 if (indirect_p)
8545 type = build_pointer_type (type);
8546 size = int_size_in_bytes (type);
8547 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8549 nat_mode = type_natural_mode (type, NULL, false);
8550 switch (nat_mode)
8552 case V8SFmode:
8553 case V8SImode:
8554 case V32QImode:
8555 case V16HImode:
8556 case V4DFmode:
8557 case V4DImode:
8558 case V16SFmode:
8559 case V16SImode:
8560 case V64QImode:
8561 case V32HImode:
8562 case V8DFmode:
8563 case V8DImode:
8564 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8565 if (!TARGET_64BIT_MS_ABI)
8567 container = NULL;
8568 break;
8571 default:
8572 container = construct_container (nat_mode, TYPE_MODE (type),
8573 type, 0, X86_64_REGPARM_MAX,
8574 X86_64_SSE_REGPARM_MAX, intreg,
8576 break;
8579 /* Pull the value out of the saved registers. */
8581 addr = create_tmp_var (ptr_type_node, "addr");
8583 if (container)
8585 int needed_intregs, needed_sseregs;
8586 bool need_temp;
8587 tree int_addr, sse_addr;
8589 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8590 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8592 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8594 need_temp = (!REG_P (container)
8595 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8596 || TYPE_ALIGN (type) > 128));
8598 /* In case we are passing structure, verify that it is consecutive block
8599 on the register save area. If not we need to do moves. */
8600 if (!need_temp && !REG_P (container))
8602 /* Verify that all registers are strictly consecutive */
8603 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8605 int i;
8607 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8609 rtx slot = XVECEXP (container, 0, i);
8610 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8611 || INTVAL (XEXP (slot, 1)) != i * 16)
8612 need_temp = 1;
8615 else
8617 int i;
8619 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8621 rtx slot = XVECEXP (container, 0, i);
8622 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8623 || INTVAL (XEXP (slot, 1)) != i * 8)
8624 need_temp = 1;
8628 if (!need_temp)
8630 int_addr = addr;
8631 sse_addr = addr;
8633 else
8635 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8636 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8639 /* First ensure that we fit completely in registers. */
8640 if (needed_intregs)
8642 t = build_int_cst (TREE_TYPE (gpr),
8643 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8644 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8645 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8646 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8647 gimplify_and_add (t, pre_p);
8649 if (needed_sseregs)
8651 t = build_int_cst (TREE_TYPE (fpr),
8652 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8653 + X86_64_REGPARM_MAX * 8);
8654 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8655 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8656 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8657 gimplify_and_add (t, pre_p);
8660 /* Compute index to start of area used for integer regs. */
8661 if (needed_intregs)
8663 /* int_addr = gpr + sav; */
8664 t = fold_build_pointer_plus (sav, gpr);
8665 gimplify_assign (int_addr, t, pre_p);
8667 if (needed_sseregs)
8669 /* sse_addr = fpr + sav; */
8670 t = fold_build_pointer_plus (sav, fpr);
8671 gimplify_assign (sse_addr, t, pre_p);
8673 if (need_temp)
8675 int i, prev_size = 0;
8676 tree temp = create_tmp_var (type, "va_arg_tmp");
8678 /* addr = &temp; */
8679 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8680 gimplify_assign (addr, t, pre_p);
8682 for (i = 0; i < XVECLEN (container, 0); i++)
8684 rtx slot = XVECEXP (container, 0, i);
8685 rtx reg = XEXP (slot, 0);
8686 enum machine_mode mode = GET_MODE (reg);
8687 tree piece_type;
8688 tree addr_type;
8689 tree daddr_type;
8690 tree src_addr, src;
8691 int src_offset;
8692 tree dest_addr, dest;
8693 int cur_size = GET_MODE_SIZE (mode);
8695 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8696 prev_size = INTVAL (XEXP (slot, 1));
8697 if (prev_size + cur_size > size)
8699 cur_size = size - prev_size;
8700 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8701 if (mode == BLKmode)
8702 mode = QImode;
8704 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8705 if (mode == GET_MODE (reg))
8706 addr_type = build_pointer_type (piece_type);
8707 else
8708 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8709 true);
8710 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8711 true);
8713 if (SSE_REGNO_P (REGNO (reg)))
8715 src_addr = sse_addr;
8716 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8718 else
8720 src_addr = int_addr;
8721 src_offset = REGNO (reg) * 8;
8723 src_addr = fold_convert (addr_type, src_addr);
8724 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8726 dest_addr = fold_convert (daddr_type, addr);
8727 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8728 if (cur_size == GET_MODE_SIZE (mode))
8730 src = build_va_arg_indirect_ref (src_addr);
8731 dest = build_va_arg_indirect_ref (dest_addr);
8733 gimplify_assign (dest, src, pre_p);
8735 else
8737 tree copy
8738 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8739 3, dest_addr, src_addr,
8740 size_int (cur_size));
8741 gimplify_and_add (copy, pre_p);
8743 prev_size += cur_size;
8747 if (needed_intregs)
8749 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8750 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8751 gimplify_assign (gpr, t, pre_p);
8754 if (needed_sseregs)
8756 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8757 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8758 gimplify_assign (fpr, t, pre_p);
8761 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8763 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8766 /* ... otherwise out of the overflow area. */
8768 /* When we align parameter on stack for caller, if the parameter
8769 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8770 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8771 here with caller. */
8772 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8773 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8774 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8776 /* Care for on-stack alignment if needed. */
8777 if (arg_boundary <= 64 || size == 0)
8778 t = ovf;
8779 else
8781 HOST_WIDE_INT align = arg_boundary / 8;
8782 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8783 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8784 build_int_cst (TREE_TYPE (t), -align));
8787 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8788 gimplify_assign (addr, t, pre_p);
8790 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8791 gimplify_assign (unshare_expr (ovf), t, pre_p);
8793 if (container)
8794 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8796 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8797 addr = fold_convert (ptrtype, addr);
8799 if (indirect_p)
8800 addr = build_va_arg_indirect_ref (addr);
8801 return build_va_arg_indirect_ref (addr);
8804 /* Return true if OPNUM's MEM should be matched
8805 in movabs* patterns. */
8807 bool
8808 ix86_check_movabs (rtx insn, int opnum)
8810 rtx set, mem;
8812 set = PATTERN (insn);
8813 if (GET_CODE (set) == PARALLEL)
8814 set = XVECEXP (set, 0, 0);
8815 gcc_assert (GET_CODE (set) == SET);
8816 mem = XEXP (set, opnum);
8817 while (GET_CODE (mem) == SUBREG)
8818 mem = SUBREG_REG (mem);
8819 gcc_assert (MEM_P (mem));
8820 return volatile_ok || !MEM_VOLATILE_P (mem);
8823 /* Initialize the table of extra 80387 mathematical constants. */
8825 static void
8826 init_ext_80387_constants (void)
8828 static const char * cst[5] =
8830 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8831 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8832 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8833 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8834 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8836 int i;
8838 for (i = 0; i < 5; i++)
8840 real_from_string (&ext_80387_constants_table[i], cst[i]);
8841 /* Ensure each constant is rounded to XFmode precision. */
8842 real_convert (&ext_80387_constants_table[i],
8843 XFmode, &ext_80387_constants_table[i]);
8846 ext_80387_constants_init = 1;
8849 /* Return non-zero if the constant is something that
8850 can be loaded with a special instruction. */
8853 standard_80387_constant_p (rtx x)
8855 enum machine_mode mode = GET_MODE (x);
8857 REAL_VALUE_TYPE r;
8859 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8860 return -1;
8862 if (x == CONST0_RTX (mode))
8863 return 1;
8864 if (x == CONST1_RTX (mode))
8865 return 2;
8867 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8869 /* For XFmode constants, try to find a special 80387 instruction when
8870 optimizing for size or on those CPUs that benefit from them. */
8871 if (mode == XFmode
8872 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8874 int i;
8876 if (! ext_80387_constants_init)
8877 init_ext_80387_constants ();
8879 for (i = 0; i < 5; i++)
8880 if (real_identical (&r, &ext_80387_constants_table[i]))
8881 return i + 3;
8884 /* Load of the constant -0.0 or -1.0 will be split as
8885 fldz;fchs or fld1;fchs sequence. */
8886 if (real_isnegzero (&r))
8887 return 8;
8888 if (real_identical (&r, &dconstm1))
8889 return 9;
8891 return 0;
8894 /* Return the opcode of the special instruction to be used to load
8895 the constant X. */
8897 const char *
8898 standard_80387_constant_opcode (rtx x)
8900 switch (standard_80387_constant_p (x))
8902 case 1:
8903 return "fldz";
8904 case 2:
8905 return "fld1";
8906 case 3:
8907 return "fldlg2";
8908 case 4:
8909 return "fldln2";
8910 case 5:
8911 return "fldl2e";
8912 case 6:
8913 return "fldl2t";
8914 case 7:
8915 return "fldpi";
8916 case 8:
8917 case 9:
8918 return "#";
8919 default:
8920 gcc_unreachable ();
8924 /* Return the CONST_DOUBLE representing the 80387 constant that is
8925 loaded by the specified special instruction. The argument IDX
8926 matches the return value from standard_80387_constant_p. */
8929 standard_80387_constant_rtx (int idx)
8931 int i;
8933 if (! ext_80387_constants_init)
8934 init_ext_80387_constants ();
8936 switch (idx)
8938 case 3:
8939 case 4:
8940 case 5:
8941 case 6:
8942 case 7:
8943 i = idx - 3;
8944 break;
8946 default:
8947 gcc_unreachable ();
8950 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8951 XFmode);
8954 /* Return 1 if X is all 0s and 2 if x is all 1s
8955 in supported SSE/AVX vector mode. */
8958 standard_sse_constant_p (rtx x)
8960 enum machine_mode mode = GET_MODE (x);
8962 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8963 return 1;
8964 if (vector_all_ones_operand (x, mode))
8965 switch (mode)
8967 case V16QImode:
8968 case V8HImode:
8969 case V4SImode:
8970 case V2DImode:
8971 if (TARGET_SSE2)
8972 return 2;
8973 case V32QImode:
8974 case V16HImode:
8975 case V8SImode:
8976 case V4DImode:
8977 if (TARGET_AVX2)
8978 return 2;
8979 case V64QImode:
8980 case V32HImode:
8981 case V16SImode:
8982 case V8DImode:
8983 if (TARGET_AVX512F)
8984 return 2;
8985 default:
8986 break;
8989 return 0;
8992 /* Return the opcode of the special instruction to be used to load
8993 the constant X. */
8995 const char *
8996 standard_sse_constant_opcode (rtx insn, rtx x)
8998 switch (standard_sse_constant_p (x))
9000 case 1:
9001 switch (get_attr_mode (insn))
9003 case MODE_XI:
9004 return "vpxord\t%g0, %g0, %g0";
9005 case MODE_V16SF:
9006 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9007 : "vpxord\t%g0, %g0, %g0";
9008 case MODE_V8DF:
9009 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9010 : "vpxorq\t%g0, %g0, %g0";
9011 case MODE_TI:
9012 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9013 : "%vpxor\t%0, %d0";
9014 case MODE_V2DF:
9015 return "%vxorpd\t%0, %d0";
9016 case MODE_V4SF:
9017 return "%vxorps\t%0, %d0";
9019 case MODE_OI:
9020 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9021 : "vpxor\t%x0, %x0, %x0";
9022 case MODE_V4DF:
9023 return "vxorpd\t%x0, %x0, %x0";
9024 case MODE_V8SF:
9025 return "vxorps\t%x0, %x0, %x0";
9027 default:
9028 break;
9031 case 2:
9032 if (TARGET_AVX512VL
9033 ||get_attr_mode (insn) == MODE_XI
9034 || get_attr_mode (insn) == MODE_V8DF
9035 || get_attr_mode (insn) == MODE_V16SF)
9036 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9037 if (TARGET_AVX)
9038 return "vpcmpeqd\t%0, %0, %0";
9039 else
9040 return "pcmpeqd\t%0, %0";
9042 default:
9043 break;
9045 gcc_unreachable ();
9048 /* Returns true if OP contains a symbol reference */
9050 bool
9051 symbolic_reference_mentioned_p (rtx op)
9053 const char *fmt;
9054 int i;
9056 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9057 return true;
9059 fmt = GET_RTX_FORMAT (GET_CODE (op));
9060 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9062 if (fmt[i] == 'E')
9064 int j;
9066 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9067 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9068 return true;
9071 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9072 return true;
9075 return false;
9078 /* Return true if it is appropriate to emit `ret' instructions in the
9079 body of a function. Do this only if the epilogue is simple, needing a
9080 couple of insns. Prior to reloading, we can't tell how many registers
9081 must be saved, so return false then. Return false if there is no frame
9082 marker to de-allocate. */
9084 bool
9085 ix86_can_use_return_insn_p (void)
9087 struct ix86_frame frame;
9089 if (! reload_completed || frame_pointer_needed)
9090 return 0;
9092 /* Don't allow more than 32k pop, since that's all we can do
9093 with one instruction. */
9094 if (crtl->args.pops_args && crtl->args.size >= 32768)
9095 return 0;
9097 ix86_compute_frame_layout (&frame);
9098 return (frame.stack_pointer_offset == UNITS_PER_WORD
9099 && (frame.nregs + frame.nsseregs) == 0);
9102 /* Value should be nonzero if functions must have frame pointers.
9103 Zero means the frame pointer need not be set up (and parms may
9104 be accessed via the stack pointer) in functions that seem suitable. */
9106 static bool
9107 ix86_frame_pointer_required (void)
9109 /* If we accessed previous frames, then the generated code expects
9110 to be able to access the saved ebp value in our frame. */
9111 if (cfun->machine->accesses_prev_frame)
9112 return true;
9114 /* Several x86 os'es need a frame pointer for other reasons,
9115 usually pertaining to setjmp. */
9116 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9117 return true;
9119 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9120 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9121 return true;
9123 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9124 allocation is 4GB. */
9125 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9126 return true;
9128 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9129 turns off the frame pointer by default. Turn it back on now if
9130 we've not got a leaf function. */
9131 if (TARGET_OMIT_LEAF_FRAME_POINTER
9132 && (!crtl->is_leaf
9133 || ix86_current_function_calls_tls_descriptor))
9134 return true;
9136 if (crtl->profile && !flag_fentry)
9137 return true;
9139 return false;
9142 /* Record that the current function accesses previous call frames. */
9144 void
9145 ix86_setup_frame_addresses (void)
9147 cfun->machine->accesses_prev_frame = 1;
9150 #ifndef USE_HIDDEN_LINKONCE
9151 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9152 # define USE_HIDDEN_LINKONCE 1
9153 # else
9154 # define USE_HIDDEN_LINKONCE 0
9155 # endif
9156 #endif
9158 static int pic_labels_used;
9160 /* Fills in the label name that should be used for a pc thunk for
9161 the given register. */
9163 static void
9164 get_pc_thunk_name (char name[32], unsigned int regno)
9166 gcc_assert (!TARGET_64BIT);
9168 if (USE_HIDDEN_LINKONCE)
9169 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9170 else
9171 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9175 /* This function generates code for -fpic that loads %ebx with
9176 the return address of the caller and then returns. */
9178 static void
9179 ix86_code_end (void)
9181 rtx xops[2];
9182 int regno;
9184 for (regno = AX_REG; regno <= SP_REG; regno++)
9186 char name[32];
9187 tree decl;
9189 if (!(pic_labels_used & (1 << regno)))
9190 continue;
9192 get_pc_thunk_name (name, regno);
9194 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9195 get_identifier (name),
9196 build_function_type_list (void_type_node, NULL_TREE));
9197 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9198 NULL_TREE, void_type_node);
9199 TREE_PUBLIC (decl) = 1;
9200 TREE_STATIC (decl) = 1;
9201 DECL_IGNORED_P (decl) = 1;
9203 #if TARGET_MACHO
9204 if (TARGET_MACHO)
9206 switch_to_section (darwin_sections[text_coal_section]);
9207 fputs ("\t.weak_definition\t", asm_out_file);
9208 assemble_name (asm_out_file, name);
9209 fputs ("\n\t.private_extern\t", asm_out_file);
9210 assemble_name (asm_out_file, name);
9211 putc ('\n', asm_out_file);
9212 ASM_OUTPUT_LABEL (asm_out_file, name);
9213 DECL_WEAK (decl) = 1;
9215 else
9216 #endif
9217 if (USE_HIDDEN_LINKONCE)
9219 cgraph_create_node (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9221 targetm.asm_out.unique_section (decl, 0);
9222 switch_to_section (get_named_section (decl, NULL, 0));
9224 targetm.asm_out.globalize_label (asm_out_file, name);
9225 fputs ("\t.hidden\t", asm_out_file);
9226 assemble_name (asm_out_file, name);
9227 putc ('\n', asm_out_file);
9228 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9230 else
9232 switch_to_section (text_section);
9233 ASM_OUTPUT_LABEL (asm_out_file, name);
9236 DECL_INITIAL (decl) = make_node (BLOCK);
9237 current_function_decl = decl;
9238 init_function_start (decl);
9239 first_function_block_is_cold = false;
9240 /* Make sure unwind info is emitted for the thunk if needed. */
9241 final_start_function (emit_barrier (), asm_out_file, 1);
9243 /* Pad stack IP move with 4 instructions (two NOPs count
9244 as one instruction). */
9245 if (TARGET_PAD_SHORT_FUNCTION)
9247 int i = 8;
9249 while (i--)
9250 fputs ("\tnop\n", asm_out_file);
9253 xops[0] = gen_rtx_REG (Pmode, regno);
9254 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9255 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9256 fputs ("\tret\n", asm_out_file);
9257 final_end_function ();
9258 init_insn_lengths ();
9259 free_after_compilation (cfun);
9260 set_cfun (NULL);
9261 current_function_decl = NULL;
9264 if (flag_split_stack)
9265 file_end_indicate_split_stack ();
9268 /* Emit code for the SET_GOT patterns. */
9270 const char *
9271 output_set_got (rtx dest, rtx label)
9273 rtx xops[3];
9275 xops[0] = dest;
9277 if (TARGET_VXWORKS_RTP && flag_pic)
9279 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9280 xops[2] = gen_rtx_MEM (Pmode,
9281 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9282 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9284 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9285 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9286 an unadorned address. */
9287 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9288 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9289 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9290 return "";
9293 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9295 if (!flag_pic)
9297 if (TARGET_MACHO)
9298 /* We don't need a pic base, we're not producing pic. */
9299 gcc_unreachable ();
9301 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9302 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9303 targetm.asm_out.internal_label (asm_out_file, "L",
9304 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9306 else
9308 char name[32];
9309 get_pc_thunk_name (name, REGNO (dest));
9310 pic_labels_used |= 1 << REGNO (dest);
9312 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9313 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9314 output_asm_insn ("call\t%X2", xops);
9316 #if TARGET_MACHO
9317 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9318 This is what will be referenced by the Mach-O PIC subsystem. */
9319 if (machopic_should_output_picbase_label () || !label)
9320 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9322 /* When we are restoring the pic base at the site of a nonlocal label,
9323 and we decided to emit the pic base above, we will still output a
9324 local label used for calculating the correction offset (even though
9325 the offset will be 0 in that case). */
9326 if (label)
9327 targetm.asm_out.internal_label (asm_out_file, "L",
9328 CODE_LABEL_NUMBER (label));
9329 #endif
9332 if (!TARGET_MACHO)
9333 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9335 return "";
9338 /* Generate an "push" pattern for input ARG. */
9340 static rtx
9341 gen_push (rtx arg)
9343 struct machine_function *m = cfun->machine;
9345 if (m->fs.cfa_reg == stack_pointer_rtx)
9346 m->fs.cfa_offset += UNITS_PER_WORD;
9347 m->fs.sp_offset += UNITS_PER_WORD;
9349 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9350 arg = gen_rtx_REG (word_mode, REGNO (arg));
9352 return gen_rtx_SET (VOIDmode,
9353 gen_rtx_MEM (word_mode,
9354 gen_rtx_PRE_DEC (Pmode,
9355 stack_pointer_rtx)),
9356 arg);
9359 /* Generate an "pop" pattern for input ARG. */
9361 static rtx
9362 gen_pop (rtx arg)
9364 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9365 arg = gen_rtx_REG (word_mode, REGNO (arg));
9367 return gen_rtx_SET (VOIDmode,
9368 arg,
9369 gen_rtx_MEM (word_mode,
9370 gen_rtx_POST_INC (Pmode,
9371 stack_pointer_rtx)));
9374 /* Return >= 0 if there is an unused call-clobbered register available
9375 for the entire function. */
9377 static unsigned int
9378 ix86_select_alt_pic_regnum (void)
9380 if (crtl->is_leaf
9381 && !crtl->profile
9382 && !ix86_current_function_calls_tls_descriptor)
9384 int i, drap;
9385 /* Can't use the same register for both PIC and DRAP. */
9386 if (crtl->drap_reg)
9387 drap = REGNO (crtl->drap_reg);
9388 else
9389 drap = -1;
9390 for (i = 2; i >= 0; --i)
9391 if (i != drap && !df_regs_ever_live_p (i))
9392 return i;
9395 return INVALID_REGNUM;
9398 /* Return TRUE if we need to save REGNO. */
9400 static bool
9401 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9403 if (pic_offset_table_rtx
9404 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9405 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9406 || crtl->profile
9407 || crtl->calls_eh_return
9408 || crtl->uses_const_pool
9409 || cfun->has_nonlocal_label))
9410 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9412 if (crtl->calls_eh_return && maybe_eh_return)
9414 unsigned i;
9415 for (i = 0; ; i++)
9417 unsigned test = EH_RETURN_DATA_REGNO (i);
9418 if (test == INVALID_REGNUM)
9419 break;
9420 if (test == regno)
9421 return true;
9425 if (crtl->drap_reg
9426 && regno == REGNO (crtl->drap_reg)
9427 && !cfun->machine->no_drap_save_restore)
9428 return true;
9430 return (df_regs_ever_live_p (regno)
9431 && !call_used_regs[regno]
9432 && !fixed_regs[regno]
9433 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9436 /* Return number of saved general prupose registers. */
9438 static int
9439 ix86_nsaved_regs (void)
9441 int nregs = 0;
9442 int regno;
9444 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9445 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9446 nregs ++;
9447 return nregs;
9450 /* Return number of saved SSE registrers. */
9452 static int
9453 ix86_nsaved_sseregs (void)
9455 int nregs = 0;
9456 int regno;
9458 if (!TARGET_64BIT_MS_ABI)
9459 return 0;
9460 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9461 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9462 nregs ++;
9463 return nregs;
9466 /* Given FROM and TO register numbers, say whether this elimination is
9467 allowed. If stack alignment is needed, we can only replace argument
9468 pointer with hard frame pointer, or replace frame pointer with stack
9469 pointer. Otherwise, frame pointer elimination is automatically
9470 handled and all other eliminations are valid. */
9472 static bool
9473 ix86_can_eliminate (const int from, const int to)
9475 if (stack_realign_fp)
9476 return ((from == ARG_POINTER_REGNUM
9477 && to == HARD_FRAME_POINTER_REGNUM)
9478 || (from == FRAME_POINTER_REGNUM
9479 && to == STACK_POINTER_REGNUM));
9480 else
9481 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9484 /* Return the offset between two registers, one to be eliminated, and the other
9485 its replacement, at the start of a routine. */
9487 HOST_WIDE_INT
9488 ix86_initial_elimination_offset (int from, int to)
9490 struct ix86_frame frame;
9491 ix86_compute_frame_layout (&frame);
9493 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9494 return frame.hard_frame_pointer_offset;
9495 else if (from == FRAME_POINTER_REGNUM
9496 && to == HARD_FRAME_POINTER_REGNUM)
9497 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9498 else
9500 gcc_assert (to == STACK_POINTER_REGNUM);
9502 if (from == ARG_POINTER_REGNUM)
9503 return frame.stack_pointer_offset;
9505 gcc_assert (from == FRAME_POINTER_REGNUM);
9506 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9510 /* In a dynamically-aligned function, we can't know the offset from
9511 stack pointer to frame pointer, so we must ensure that setjmp
9512 eliminates fp against the hard fp (%ebp) rather than trying to
9513 index from %esp up to the top of the frame across a gap that is
9514 of unknown (at compile-time) size. */
9515 static rtx
9516 ix86_builtin_setjmp_frame_value (void)
9518 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9521 /* When using -fsplit-stack, the allocation routines set a field in
9522 the TCB to the bottom of the stack plus this much space, measured
9523 in bytes. */
9525 #define SPLIT_STACK_AVAILABLE 256
9527 /* Fill structure ix86_frame about frame of currently computed function. */
9529 static void
9530 ix86_compute_frame_layout (struct ix86_frame *frame)
9532 unsigned HOST_WIDE_INT stack_alignment_needed;
9533 HOST_WIDE_INT offset;
9534 unsigned HOST_WIDE_INT preferred_alignment;
9535 HOST_WIDE_INT size = get_frame_size ();
9536 HOST_WIDE_INT to_allocate;
9538 frame->nregs = ix86_nsaved_regs ();
9539 frame->nsseregs = ix86_nsaved_sseregs ();
9541 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9542 function prologues and leaf. */
9543 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9544 && (!crtl->is_leaf || cfun->calls_alloca != 0
9545 || ix86_current_function_calls_tls_descriptor))
9547 crtl->preferred_stack_boundary = 128;
9548 crtl->stack_alignment_needed = 128;
9550 /* preferred_stack_boundary is never updated for call
9551 expanded from tls descriptor. Update it here. We don't update it in
9552 expand stage because according to the comments before
9553 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9554 away. */
9555 else if (ix86_current_function_calls_tls_descriptor
9556 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9558 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9559 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9560 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9563 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9564 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9566 gcc_assert (!size || stack_alignment_needed);
9567 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9568 gcc_assert (preferred_alignment <= stack_alignment_needed);
9570 /* For SEH we have to limit the amount of code movement into the prologue.
9571 At present we do this via a BLOCKAGE, at which point there's very little
9572 scheduling that can be done, which means that there's very little point
9573 in doing anything except PUSHs. */
9574 if (TARGET_SEH)
9575 cfun->machine->use_fast_prologue_epilogue = false;
9577 /* During reload iteration the amount of registers saved can change.
9578 Recompute the value as needed. Do not recompute when amount of registers
9579 didn't change as reload does multiple calls to the function and does not
9580 expect the decision to change within single iteration. */
9581 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9582 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9584 int count = frame->nregs;
9585 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9587 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9589 /* The fast prologue uses move instead of push to save registers. This
9590 is significantly longer, but also executes faster as modern hardware
9591 can execute the moves in parallel, but can't do that for push/pop.
9593 Be careful about choosing what prologue to emit: When function takes
9594 many instructions to execute we may use slow version as well as in
9595 case function is known to be outside hot spot (this is known with
9596 feedback only). Weight the size of function by number of registers
9597 to save as it is cheap to use one or two push instructions but very
9598 slow to use many of them. */
9599 if (count)
9600 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9601 if (node->frequency < NODE_FREQUENCY_NORMAL
9602 || (flag_branch_probabilities
9603 && node->frequency < NODE_FREQUENCY_HOT))
9604 cfun->machine->use_fast_prologue_epilogue = false;
9605 else
9606 cfun->machine->use_fast_prologue_epilogue
9607 = !expensive_function_p (count);
9610 frame->save_regs_using_mov
9611 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9612 /* If static stack checking is enabled and done with probes,
9613 the registers need to be saved before allocating the frame. */
9614 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9616 /* Skip return address. */
9617 offset = UNITS_PER_WORD;
9619 /* Skip pushed static chain. */
9620 if (ix86_static_chain_on_stack)
9621 offset += UNITS_PER_WORD;
9623 /* Skip saved base pointer. */
9624 if (frame_pointer_needed)
9625 offset += UNITS_PER_WORD;
9626 frame->hfp_save_offset = offset;
9628 /* The traditional frame pointer location is at the top of the frame. */
9629 frame->hard_frame_pointer_offset = offset;
9631 /* Register save area */
9632 offset += frame->nregs * UNITS_PER_WORD;
9633 frame->reg_save_offset = offset;
9635 /* On SEH target, registers are pushed just before the frame pointer
9636 location. */
9637 if (TARGET_SEH)
9638 frame->hard_frame_pointer_offset = offset;
9640 /* Align and set SSE register save area. */
9641 if (frame->nsseregs)
9643 /* The only ABI that has saved SSE registers (Win64) also has a
9644 16-byte aligned default stack, and thus we don't need to be
9645 within the re-aligned local stack frame to save them. */
9646 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9647 offset = (offset + 16 - 1) & -16;
9648 offset += frame->nsseregs * 16;
9650 frame->sse_reg_save_offset = offset;
9652 /* The re-aligned stack starts here. Values before this point are not
9653 directly comparable with values below this point. In order to make
9654 sure that no value happens to be the same before and after, force
9655 the alignment computation below to add a non-zero value. */
9656 if (stack_realign_fp)
9657 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9659 /* Va-arg area */
9660 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9661 offset += frame->va_arg_size;
9663 /* Align start of frame for local function. */
9664 if (stack_realign_fp
9665 || offset != frame->sse_reg_save_offset
9666 || size != 0
9667 || !crtl->is_leaf
9668 || cfun->calls_alloca
9669 || ix86_current_function_calls_tls_descriptor)
9670 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9672 /* Frame pointer points here. */
9673 frame->frame_pointer_offset = offset;
9675 offset += size;
9677 /* Add outgoing arguments area. Can be skipped if we eliminated
9678 all the function calls as dead code.
9679 Skipping is however impossible when function calls alloca. Alloca
9680 expander assumes that last crtl->outgoing_args_size
9681 of stack frame are unused. */
9682 if (ACCUMULATE_OUTGOING_ARGS
9683 && (!crtl->is_leaf || cfun->calls_alloca
9684 || ix86_current_function_calls_tls_descriptor))
9686 offset += crtl->outgoing_args_size;
9687 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9689 else
9690 frame->outgoing_arguments_size = 0;
9692 /* Align stack boundary. Only needed if we're calling another function
9693 or using alloca. */
9694 if (!crtl->is_leaf || cfun->calls_alloca
9695 || ix86_current_function_calls_tls_descriptor)
9696 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9698 /* We've reached end of stack frame. */
9699 frame->stack_pointer_offset = offset;
9701 /* Size prologue needs to allocate. */
9702 to_allocate = offset - frame->sse_reg_save_offset;
9704 if ((!to_allocate && frame->nregs <= 1)
9705 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9706 frame->save_regs_using_mov = false;
9708 if (ix86_using_red_zone ()
9709 && crtl->sp_is_unchanging
9710 && crtl->is_leaf
9711 && !ix86_current_function_calls_tls_descriptor)
9713 frame->red_zone_size = to_allocate;
9714 if (frame->save_regs_using_mov)
9715 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9716 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9717 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9719 else
9720 frame->red_zone_size = 0;
9721 frame->stack_pointer_offset -= frame->red_zone_size;
9723 /* The SEH frame pointer location is near the bottom of the frame.
9724 This is enforced by the fact that the difference between the
9725 stack pointer and the frame pointer is limited to 240 bytes in
9726 the unwind data structure. */
9727 if (TARGET_SEH)
9729 HOST_WIDE_INT diff;
9731 /* If we can leave the frame pointer where it is, do so. Also, returns
9732 the establisher frame for __builtin_frame_address (0). */
9733 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9734 if (diff <= SEH_MAX_FRAME_SIZE
9735 && (diff > 240 || (diff & 15) != 0)
9736 && !crtl->accesses_prior_frames)
9738 /* Ideally we'd determine what portion of the local stack frame
9739 (within the constraint of the lowest 240) is most heavily used.
9740 But without that complication, simply bias the frame pointer
9741 by 128 bytes so as to maximize the amount of the local stack
9742 frame that is addressable with 8-bit offsets. */
9743 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9748 /* This is semi-inlined memory_address_length, but simplified
9749 since we know that we're always dealing with reg+offset, and
9750 to avoid having to create and discard all that rtl. */
9752 static inline int
9753 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9755 int len = 4;
9757 if (offset == 0)
9759 /* EBP and R13 cannot be encoded without an offset. */
9760 len = (regno == BP_REG || regno == R13_REG);
9762 else if (IN_RANGE (offset, -128, 127))
9763 len = 1;
9765 /* ESP and R12 must be encoded with a SIB byte. */
9766 if (regno == SP_REG || regno == R12_REG)
9767 len++;
9769 return len;
9772 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9773 The valid base registers are taken from CFUN->MACHINE->FS. */
9775 static rtx
9776 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9778 const struct machine_function *m = cfun->machine;
9779 rtx base_reg = NULL;
9780 HOST_WIDE_INT base_offset = 0;
9782 if (m->use_fast_prologue_epilogue)
9784 /* Choose the base register most likely to allow the most scheduling
9785 opportunities. Generally FP is valid throughout the function,
9786 while DRAP must be reloaded within the epilogue. But choose either
9787 over the SP due to increased encoding size. */
9789 if (m->fs.fp_valid)
9791 base_reg = hard_frame_pointer_rtx;
9792 base_offset = m->fs.fp_offset - cfa_offset;
9794 else if (m->fs.drap_valid)
9796 base_reg = crtl->drap_reg;
9797 base_offset = 0 - cfa_offset;
9799 else if (m->fs.sp_valid)
9801 base_reg = stack_pointer_rtx;
9802 base_offset = m->fs.sp_offset - cfa_offset;
9805 else
9807 HOST_WIDE_INT toffset;
9808 int len = 16, tlen;
9810 /* Choose the base register with the smallest address encoding.
9811 With a tie, choose FP > DRAP > SP. */
9812 if (m->fs.sp_valid)
9814 base_reg = stack_pointer_rtx;
9815 base_offset = m->fs.sp_offset - cfa_offset;
9816 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9818 if (m->fs.drap_valid)
9820 toffset = 0 - cfa_offset;
9821 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9822 if (tlen <= len)
9824 base_reg = crtl->drap_reg;
9825 base_offset = toffset;
9826 len = tlen;
9829 if (m->fs.fp_valid)
9831 toffset = m->fs.fp_offset - cfa_offset;
9832 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9833 if (tlen <= len)
9835 base_reg = hard_frame_pointer_rtx;
9836 base_offset = toffset;
9837 len = tlen;
9841 gcc_assert (base_reg != NULL);
9843 return plus_constant (Pmode, base_reg, base_offset);
9846 /* Emit code to save registers in the prologue. */
9848 static void
9849 ix86_emit_save_regs (void)
9851 unsigned int regno;
9852 rtx insn;
9854 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9855 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9857 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9858 RTX_FRAME_RELATED_P (insn) = 1;
9862 /* Emit a single register save at CFA - CFA_OFFSET. */
9864 static void
9865 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9866 HOST_WIDE_INT cfa_offset)
9868 struct machine_function *m = cfun->machine;
9869 rtx reg = gen_rtx_REG (mode, regno);
9870 rtx mem, addr, base, insn;
9872 addr = choose_baseaddr (cfa_offset);
9873 mem = gen_frame_mem (mode, addr);
9875 /* For SSE saves, we need to indicate the 128-bit alignment. */
9876 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9878 insn = emit_move_insn (mem, reg);
9879 RTX_FRAME_RELATED_P (insn) = 1;
9881 base = addr;
9882 if (GET_CODE (base) == PLUS)
9883 base = XEXP (base, 0);
9884 gcc_checking_assert (REG_P (base));
9886 /* When saving registers into a re-aligned local stack frame, avoid
9887 any tricky guessing by dwarf2out. */
9888 if (m->fs.realigned)
9890 gcc_checking_assert (stack_realign_drap);
9892 if (regno == REGNO (crtl->drap_reg))
9894 /* A bit of a hack. We force the DRAP register to be saved in
9895 the re-aligned stack frame, which provides us with a copy
9896 of the CFA that will last past the prologue. Install it. */
9897 gcc_checking_assert (cfun->machine->fs.fp_valid);
9898 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9899 cfun->machine->fs.fp_offset - cfa_offset);
9900 mem = gen_rtx_MEM (mode, addr);
9901 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9903 else
9905 /* The frame pointer is a stable reference within the
9906 aligned frame. Use it. */
9907 gcc_checking_assert (cfun->machine->fs.fp_valid);
9908 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9909 cfun->machine->fs.fp_offset - cfa_offset);
9910 mem = gen_rtx_MEM (mode, addr);
9911 add_reg_note (insn, REG_CFA_EXPRESSION,
9912 gen_rtx_SET (VOIDmode, mem, reg));
9916 /* The memory may not be relative to the current CFA register,
9917 which means that we may need to generate a new pattern for
9918 use by the unwind info. */
9919 else if (base != m->fs.cfa_reg)
9921 addr = plus_constant (Pmode, m->fs.cfa_reg,
9922 m->fs.cfa_offset - cfa_offset);
9923 mem = gen_rtx_MEM (mode, addr);
9924 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9928 /* Emit code to save registers using MOV insns.
9929 First register is stored at CFA - CFA_OFFSET. */
9930 static void
9931 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9933 unsigned int regno;
9935 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9936 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9938 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9939 cfa_offset -= UNITS_PER_WORD;
9943 /* Emit code to save SSE registers using MOV insns.
9944 First register is stored at CFA - CFA_OFFSET. */
9945 static void
9946 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9948 unsigned int regno;
9950 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9951 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9953 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9954 cfa_offset -= 16;
9958 static GTY(()) rtx queued_cfa_restores;
9960 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9961 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9962 Don't add the note if the previously saved value will be left untouched
9963 within stack red-zone till return, as unwinders can find the same value
9964 in the register and on the stack. */
9966 static void
9967 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9969 if (!crtl->shrink_wrapped
9970 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9971 return;
9973 if (insn)
9975 add_reg_note (insn, REG_CFA_RESTORE, reg);
9976 RTX_FRAME_RELATED_P (insn) = 1;
9978 else
9979 queued_cfa_restores
9980 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9983 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9985 static void
9986 ix86_add_queued_cfa_restore_notes (rtx insn)
9988 rtx last;
9989 if (!queued_cfa_restores)
9990 return;
9991 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9993 XEXP (last, 1) = REG_NOTES (insn);
9994 REG_NOTES (insn) = queued_cfa_restores;
9995 queued_cfa_restores = NULL_RTX;
9996 RTX_FRAME_RELATED_P (insn) = 1;
9999 /* Expand prologue or epilogue stack adjustment.
10000 The pattern exist to put a dependency on all ebp-based memory accesses.
10001 STYLE should be negative if instructions should be marked as frame related,
10002 zero if %r11 register is live and cannot be freely used and positive
10003 otherwise. */
10005 static void
10006 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10007 int style, bool set_cfa)
10009 struct machine_function *m = cfun->machine;
10010 rtx insn;
10011 bool add_frame_related_expr = false;
10013 if (Pmode == SImode)
10014 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10015 else if (x86_64_immediate_operand (offset, DImode))
10016 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10017 else
10019 rtx tmp;
10020 /* r11 is used by indirect sibcall return as well, set before the
10021 epilogue and used after the epilogue. */
10022 if (style)
10023 tmp = gen_rtx_REG (DImode, R11_REG);
10024 else
10026 gcc_assert (src != hard_frame_pointer_rtx
10027 && dest != hard_frame_pointer_rtx);
10028 tmp = hard_frame_pointer_rtx;
10030 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10031 if (style < 0)
10032 add_frame_related_expr = true;
10034 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10037 insn = emit_insn (insn);
10038 if (style >= 0)
10039 ix86_add_queued_cfa_restore_notes (insn);
10041 if (set_cfa)
10043 rtx r;
10045 gcc_assert (m->fs.cfa_reg == src);
10046 m->fs.cfa_offset += INTVAL (offset);
10047 m->fs.cfa_reg = dest;
10049 r = gen_rtx_PLUS (Pmode, src, offset);
10050 r = gen_rtx_SET (VOIDmode, dest, r);
10051 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10052 RTX_FRAME_RELATED_P (insn) = 1;
10054 else if (style < 0)
10056 RTX_FRAME_RELATED_P (insn) = 1;
10057 if (add_frame_related_expr)
10059 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10060 r = gen_rtx_SET (VOIDmode, dest, r);
10061 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10065 if (dest == stack_pointer_rtx)
10067 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10068 bool valid = m->fs.sp_valid;
10070 if (src == hard_frame_pointer_rtx)
10072 valid = m->fs.fp_valid;
10073 ooffset = m->fs.fp_offset;
10075 else if (src == crtl->drap_reg)
10077 valid = m->fs.drap_valid;
10078 ooffset = 0;
10080 else
10082 /* Else there are two possibilities: SP itself, which we set
10083 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10084 taken care of this by hand along the eh_return path. */
10085 gcc_checking_assert (src == stack_pointer_rtx
10086 || offset == const0_rtx);
10089 m->fs.sp_offset = ooffset - INTVAL (offset);
10090 m->fs.sp_valid = valid;
10094 /* Find an available register to be used as dynamic realign argument
10095 pointer regsiter. Such a register will be written in prologue and
10096 used in begin of body, so it must not be
10097 1. parameter passing register.
10098 2. GOT pointer.
10099 We reuse static-chain register if it is available. Otherwise, we
10100 use DI for i386 and R13 for x86-64. We chose R13 since it has
10101 shorter encoding.
10103 Return: the regno of chosen register. */
10105 static unsigned int
10106 find_drap_reg (void)
10108 tree decl = cfun->decl;
10110 if (TARGET_64BIT)
10112 /* Use R13 for nested function or function need static chain.
10113 Since function with tail call may use any caller-saved
10114 registers in epilogue, DRAP must not use caller-saved
10115 register in such case. */
10116 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10117 return R13_REG;
10119 return R10_REG;
10121 else
10123 /* Use DI for nested function or function need static chain.
10124 Since function with tail call may use any caller-saved
10125 registers in epilogue, DRAP must not use caller-saved
10126 register in such case. */
10127 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10128 return DI_REG;
10130 /* Reuse static chain register if it isn't used for parameter
10131 passing. */
10132 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10134 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10135 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10136 return CX_REG;
10138 return DI_REG;
10142 /* Return minimum incoming stack alignment. */
10144 static unsigned int
10145 ix86_minimum_incoming_stack_boundary (bool sibcall)
10147 unsigned int incoming_stack_boundary;
10149 /* Prefer the one specified at command line. */
10150 if (ix86_user_incoming_stack_boundary)
10151 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10152 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10153 if -mstackrealign is used, it isn't used for sibcall check and
10154 estimated stack alignment is 128bit. */
10155 else if (!sibcall
10156 && !TARGET_64BIT
10157 && ix86_force_align_arg_pointer
10158 && crtl->stack_alignment_estimated == 128)
10159 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10160 else
10161 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10163 /* Incoming stack alignment can be changed on individual functions
10164 via force_align_arg_pointer attribute. We use the smallest
10165 incoming stack boundary. */
10166 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10167 && lookup_attribute (ix86_force_align_arg_pointer_string,
10168 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10169 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10171 /* The incoming stack frame has to be aligned at least at
10172 parm_stack_boundary. */
10173 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10174 incoming_stack_boundary = crtl->parm_stack_boundary;
10176 /* Stack at entrance of main is aligned by runtime. We use the
10177 smallest incoming stack boundary. */
10178 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10179 && DECL_NAME (current_function_decl)
10180 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10181 && DECL_FILE_SCOPE_P (current_function_decl))
10182 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10184 return incoming_stack_boundary;
10187 /* Update incoming stack boundary and estimated stack alignment. */
10189 static void
10190 ix86_update_stack_boundary (void)
10192 ix86_incoming_stack_boundary
10193 = ix86_minimum_incoming_stack_boundary (false);
10195 /* x86_64 vararg needs 16byte stack alignment for register save
10196 area. */
10197 if (TARGET_64BIT
10198 && cfun->stdarg
10199 && crtl->stack_alignment_estimated < 128)
10200 crtl->stack_alignment_estimated = 128;
10203 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10204 needed or an rtx for DRAP otherwise. */
10206 static rtx
10207 ix86_get_drap_rtx (void)
10209 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10210 crtl->need_drap = true;
10212 if (stack_realign_drap)
10214 /* Assign DRAP to vDRAP and returns vDRAP */
10215 unsigned int regno = find_drap_reg ();
10216 rtx drap_vreg;
10217 rtx arg_ptr;
10218 rtx seq, insn;
10220 arg_ptr = gen_rtx_REG (Pmode, regno);
10221 crtl->drap_reg = arg_ptr;
10223 start_sequence ();
10224 drap_vreg = copy_to_reg (arg_ptr);
10225 seq = get_insns ();
10226 end_sequence ();
10228 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10229 if (!optimize)
10231 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10232 RTX_FRAME_RELATED_P (insn) = 1;
10234 return drap_vreg;
10236 else
10237 return NULL;
10240 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10242 static rtx
10243 ix86_internal_arg_pointer (void)
10245 return virtual_incoming_args_rtx;
10248 struct scratch_reg {
10249 rtx reg;
10250 bool saved;
10253 /* Return a short-lived scratch register for use on function entry.
10254 In 32-bit mode, it is valid only after the registers are saved
10255 in the prologue. This register must be released by means of
10256 release_scratch_register_on_entry once it is dead. */
10258 static void
10259 get_scratch_register_on_entry (struct scratch_reg *sr)
10261 int regno;
10263 sr->saved = false;
10265 if (TARGET_64BIT)
10267 /* We always use R11 in 64-bit mode. */
10268 regno = R11_REG;
10270 else
10272 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10273 bool fastcall_p
10274 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10275 bool thiscall_p
10276 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10277 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10278 int regparm = ix86_function_regparm (fntype, decl);
10279 int drap_regno
10280 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10282 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10283 for the static chain register. */
10284 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10285 && drap_regno != AX_REG)
10286 regno = AX_REG;
10287 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10288 for the static chain register. */
10289 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10290 regno = AX_REG;
10291 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10292 regno = DX_REG;
10293 /* ecx is the static chain register. */
10294 else if (regparm < 3 && !fastcall_p && !thiscall_p
10295 && !static_chain_p
10296 && drap_regno != CX_REG)
10297 regno = CX_REG;
10298 else if (ix86_save_reg (BX_REG, true))
10299 regno = BX_REG;
10300 /* esi is the static chain register. */
10301 else if (!(regparm == 3 && static_chain_p)
10302 && ix86_save_reg (SI_REG, true))
10303 regno = SI_REG;
10304 else if (ix86_save_reg (DI_REG, true))
10305 regno = DI_REG;
10306 else
10308 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10309 sr->saved = true;
10313 sr->reg = gen_rtx_REG (Pmode, regno);
10314 if (sr->saved)
10316 rtx insn = emit_insn (gen_push (sr->reg));
10317 RTX_FRAME_RELATED_P (insn) = 1;
10321 /* Release a scratch register obtained from the preceding function. */
10323 static void
10324 release_scratch_register_on_entry (struct scratch_reg *sr)
10326 if (sr->saved)
10328 struct machine_function *m = cfun->machine;
10329 rtx x, insn = emit_insn (gen_pop (sr->reg));
10331 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10332 RTX_FRAME_RELATED_P (insn) = 1;
10333 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10334 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10335 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10336 m->fs.sp_offset -= UNITS_PER_WORD;
10340 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10342 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10344 static void
10345 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10347 /* We skip the probe for the first interval + a small dope of 4 words and
10348 probe that many bytes past the specified size to maintain a protection
10349 area at the botton of the stack. */
10350 const int dope = 4 * UNITS_PER_WORD;
10351 rtx size_rtx = GEN_INT (size), last;
10353 /* See if we have a constant small number of probes to generate. If so,
10354 that's the easy case. The run-time loop is made up of 11 insns in the
10355 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10356 for n # of intervals. */
10357 if (size <= 5 * PROBE_INTERVAL)
10359 HOST_WIDE_INT i, adjust;
10360 bool first_probe = true;
10362 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10363 values of N from 1 until it exceeds SIZE. If only one probe is
10364 needed, this will not generate any code. Then adjust and probe
10365 to PROBE_INTERVAL + SIZE. */
10366 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10368 if (first_probe)
10370 adjust = 2 * PROBE_INTERVAL + dope;
10371 first_probe = false;
10373 else
10374 adjust = PROBE_INTERVAL;
10376 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10377 plus_constant (Pmode, stack_pointer_rtx,
10378 -adjust)));
10379 emit_stack_probe (stack_pointer_rtx);
10382 if (first_probe)
10383 adjust = size + PROBE_INTERVAL + dope;
10384 else
10385 adjust = size + PROBE_INTERVAL - i;
10387 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10388 plus_constant (Pmode, stack_pointer_rtx,
10389 -adjust)));
10390 emit_stack_probe (stack_pointer_rtx);
10392 /* Adjust back to account for the additional first interval. */
10393 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10394 plus_constant (Pmode, stack_pointer_rtx,
10395 PROBE_INTERVAL + dope)));
10398 /* Otherwise, do the same as above, but in a loop. Note that we must be
10399 extra careful with variables wrapping around because we might be at
10400 the very top (or the very bottom) of the address space and we have
10401 to be able to handle this case properly; in particular, we use an
10402 equality test for the loop condition. */
10403 else
10405 HOST_WIDE_INT rounded_size;
10406 struct scratch_reg sr;
10408 get_scratch_register_on_entry (&sr);
10411 /* Step 1: round SIZE to the previous multiple of the interval. */
10413 rounded_size = size & -PROBE_INTERVAL;
10416 /* Step 2: compute initial and final value of the loop counter. */
10418 /* SP = SP_0 + PROBE_INTERVAL. */
10419 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10420 plus_constant (Pmode, stack_pointer_rtx,
10421 - (PROBE_INTERVAL + dope))));
10423 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10424 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10425 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10426 gen_rtx_PLUS (Pmode, sr.reg,
10427 stack_pointer_rtx)));
10430 /* Step 3: the loop
10432 while (SP != LAST_ADDR)
10434 SP = SP + PROBE_INTERVAL
10435 probe at SP
10438 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10439 values of N from 1 until it is equal to ROUNDED_SIZE. */
10441 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10444 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10445 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10447 if (size != rounded_size)
10449 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10450 plus_constant (Pmode, stack_pointer_rtx,
10451 rounded_size - size)));
10452 emit_stack_probe (stack_pointer_rtx);
10455 /* Adjust back to account for the additional first interval. */
10456 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10457 plus_constant (Pmode, stack_pointer_rtx,
10458 PROBE_INTERVAL + dope)));
10460 release_scratch_register_on_entry (&sr);
10463 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10465 /* Even if the stack pointer isn't the CFA register, we need to correctly
10466 describe the adjustments made to it, in particular differentiate the
10467 frame-related ones from the frame-unrelated ones. */
10468 if (size > 0)
10470 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10471 XVECEXP (expr, 0, 0)
10472 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10473 plus_constant (Pmode, stack_pointer_rtx, -size));
10474 XVECEXP (expr, 0, 1)
10475 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10476 plus_constant (Pmode, stack_pointer_rtx,
10477 PROBE_INTERVAL + dope + size));
10478 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10479 RTX_FRAME_RELATED_P (last) = 1;
10481 cfun->machine->fs.sp_offset += size;
10484 /* Make sure nothing is scheduled before we are done. */
10485 emit_insn (gen_blockage ());
10488 /* Adjust the stack pointer up to REG while probing it. */
10490 const char *
10491 output_adjust_stack_and_probe (rtx reg)
10493 static int labelno = 0;
10494 char loop_lab[32], end_lab[32];
10495 rtx xops[2];
10497 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10498 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10500 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10502 /* Jump to END_LAB if SP == LAST_ADDR. */
10503 xops[0] = stack_pointer_rtx;
10504 xops[1] = reg;
10505 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10506 fputs ("\tje\t", asm_out_file);
10507 assemble_name_raw (asm_out_file, end_lab);
10508 fputc ('\n', asm_out_file);
10510 /* SP = SP + PROBE_INTERVAL. */
10511 xops[1] = GEN_INT (PROBE_INTERVAL);
10512 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10514 /* Probe at SP. */
10515 xops[1] = const0_rtx;
10516 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10518 fprintf (asm_out_file, "\tjmp\t");
10519 assemble_name_raw (asm_out_file, loop_lab);
10520 fputc ('\n', asm_out_file);
10522 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10524 return "";
10527 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10528 inclusive. These are offsets from the current stack pointer. */
10530 static void
10531 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10533 /* See if we have a constant small number of probes to generate. If so,
10534 that's the easy case. The run-time loop is made up of 7 insns in the
10535 generic case while the compile-time loop is made up of n insns for n #
10536 of intervals. */
10537 if (size <= 7 * PROBE_INTERVAL)
10539 HOST_WIDE_INT i;
10541 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10542 it exceeds SIZE. If only one probe is needed, this will not
10543 generate any code. Then probe at FIRST + SIZE. */
10544 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10545 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10546 -(first + i)));
10548 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10549 -(first + size)));
10552 /* Otherwise, do the same as above, but in a loop. Note that we must be
10553 extra careful with variables wrapping around because we might be at
10554 the very top (or the very bottom) of the address space and we have
10555 to be able to handle this case properly; in particular, we use an
10556 equality test for the loop condition. */
10557 else
10559 HOST_WIDE_INT rounded_size, last;
10560 struct scratch_reg sr;
10562 get_scratch_register_on_entry (&sr);
10565 /* Step 1: round SIZE to the previous multiple of the interval. */
10567 rounded_size = size & -PROBE_INTERVAL;
10570 /* Step 2: compute initial and final value of the loop counter. */
10572 /* TEST_OFFSET = FIRST. */
10573 emit_move_insn (sr.reg, GEN_INT (-first));
10575 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10576 last = first + rounded_size;
10579 /* Step 3: the loop
10581 while (TEST_ADDR != LAST_ADDR)
10583 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10584 probe at TEST_ADDR
10587 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10588 until it is equal to ROUNDED_SIZE. */
10590 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10593 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10594 that SIZE is equal to ROUNDED_SIZE. */
10596 if (size != rounded_size)
10597 emit_stack_probe (plus_constant (Pmode,
10598 gen_rtx_PLUS (Pmode,
10599 stack_pointer_rtx,
10600 sr.reg),
10601 rounded_size - size));
10603 release_scratch_register_on_entry (&sr);
10606 /* Make sure nothing is scheduled before we are done. */
10607 emit_insn (gen_blockage ());
10610 /* Probe a range of stack addresses from REG to END, inclusive. These are
10611 offsets from the current stack pointer. */
10613 const char *
10614 output_probe_stack_range (rtx reg, rtx end)
10616 static int labelno = 0;
10617 char loop_lab[32], end_lab[32];
10618 rtx xops[3];
10620 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10621 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10623 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10625 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10626 xops[0] = reg;
10627 xops[1] = end;
10628 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10629 fputs ("\tje\t", asm_out_file);
10630 assemble_name_raw (asm_out_file, end_lab);
10631 fputc ('\n', asm_out_file);
10633 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10634 xops[1] = GEN_INT (PROBE_INTERVAL);
10635 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10637 /* Probe at TEST_ADDR. */
10638 xops[0] = stack_pointer_rtx;
10639 xops[1] = reg;
10640 xops[2] = const0_rtx;
10641 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10643 fprintf (asm_out_file, "\tjmp\t");
10644 assemble_name_raw (asm_out_file, loop_lab);
10645 fputc ('\n', asm_out_file);
10647 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10649 return "";
10652 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10653 to be generated in correct form. */
10654 static void
10655 ix86_finalize_stack_realign_flags (void)
10657 /* Check if stack realign is really needed after reload, and
10658 stores result in cfun */
10659 unsigned int incoming_stack_boundary
10660 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10661 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10662 unsigned int stack_realign = (incoming_stack_boundary
10663 < (crtl->is_leaf
10664 ? crtl->max_used_stack_slot_alignment
10665 : crtl->stack_alignment_needed));
10667 if (crtl->stack_realign_finalized)
10669 /* After stack_realign_needed is finalized, we can't no longer
10670 change it. */
10671 gcc_assert (crtl->stack_realign_needed == stack_realign);
10672 return;
10675 /* If the only reason for frame_pointer_needed is that we conservatively
10676 assumed stack realignment might be needed, but in the end nothing that
10677 needed the stack alignment had been spilled, clear frame_pointer_needed
10678 and say we don't need stack realignment. */
10679 if (stack_realign
10680 && frame_pointer_needed
10681 && crtl->is_leaf
10682 && flag_omit_frame_pointer
10683 && crtl->sp_is_unchanging
10684 && !ix86_current_function_calls_tls_descriptor
10685 && !crtl->accesses_prior_frames
10686 && !cfun->calls_alloca
10687 && !crtl->calls_eh_return
10688 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10689 && !ix86_frame_pointer_required ()
10690 && get_frame_size () == 0
10691 && ix86_nsaved_sseregs () == 0
10692 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10694 HARD_REG_SET set_up_by_prologue, prologue_used;
10695 basic_block bb;
10697 CLEAR_HARD_REG_SET (prologue_used);
10698 CLEAR_HARD_REG_SET (set_up_by_prologue);
10699 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10700 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10701 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10702 HARD_FRAME_POINTER_REGNUM);
10703 FOR_EACH_BB_FN (bb, cfun)
10705 rtx insn;
10706 FOR_BB_INSNS (bb, insn)
10707 if (NONDEBUG_INSN_P (insn)
10708 && requires_stack_frame_p (insn, prologue_used,
10709 set_up_by_prologue))
10711 crtl->stack_realign_needed = stack_realign;
10712 crtl->stack_realign_finalized = true;
10713 return;
10717 /* If drap has been set, but it actually isn't live at the start
10718 of the function, there is no reason to set it up. */
10719 if (crtl->drap_reg)
10721 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10722 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10724 crtl->drap_reg = NULL_RTX;
10725 crtl->need_drap = false;
10728 else
10729 cfun->machine->no_drap_save_restore = true;
10731 frame_pointer_needed = false;
10732 stack_realign = false;
10733 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10734 crtl->stack_alignment_needed = incoming_stack_boundary;
10735 crtl->stack_alignment_estimated = incoming_stack_boundary;
10736 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10737 crtl->preferred_stack_boundary = incoming_stack_boundary;
10738 df_finish_pass (true);
10739 df_scan_alloc (NULL);
10740 df_scan_blocks ();
10741 df_compute_regs_ever_live (true);
10742 df_analyze ();
10745 crtl->stack_realign_needed = stack_realign;
10746 crtl->stack_realign_finalized = true;
10749 /* Expand the prologue into a bunch of separate insns. */
10751 void
10752 ix86_expand_prologue (void)
10754 struct machine_function *m = cfun->machine;
10755 rtx insn, t;
10756 bool pic_reg_used;
10757 struct ix86_frame frame;
10758 HOST_WIDE_INT allocate;
10759 bool int_registers_saved;
10760 bool sse_registers_saved;
10762 ix86_finalize_stack_realign_flags ();
10764 /* DRAP should not coexist with stack_realign_fp */
10765 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10767 memset (&m->fs, 0, sizeof (m->fs));
10769 /* Initialize CFA state for before the prologue. */
10770 m->fs.cfa_reg = stack_pointer_rtx;
10771 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10773 /* Track SP offset to the CFA. We continue tracking this after we've
10774 swapped the CFA register away from SP. In the case of re-alignment
10775 this is fudged; we're interested to offsets within the local frame. */
10776 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10777 m->fs.sp_valid = true;
10779 ix86_compute_frame_layout (&frame);
10781 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10783 /* We should have already generated an error for any use of
10784 ms_hook on a nested function. */
10785 gcc_checking_assert (!ix86_static_chain_on_stack);
10787 /* Check if profiling is active and we shall use profiling before
10788 prologue variant. If so sorry. */
10789 if (crtl->profile && flag_fentry != 0)
10790 sorry ("ms_hook_prologue attribute isn%'t compatible "
10791 "with -mfentry for 32-bit");
10793 /* In ix86_asm_output_function_label we emitted:
10794 8b ff movl.s %edi,%edi
10795 55 push %ebp
10796 8b ec movl.s %esp,%ebp
10798 This matches the hookable function prologue in Win32 API
10799 functions in Microsoft Windows XP Service Pack 2 and newer.
10800 Wine uses this to enable Windows apps to hook the Win32 API
10801 functions provided by Wine.
10803 What that means is that we've already set up the frame pointer. */
10805 if (frame_pointer_needed
10806 && !(crtl->drap_reg && crtl->stack_realign_needed))
10808 rtx push, mov;
10810 /* We've decided to use the frame pointer already set up.
10811 Describe this to the unwinder by pretending that both
10812 push and mov insns happen right here.
10814 Putting the unwind info here at the end of the ms_hook
10815 is done so that we can make absolutely certain we get
10816 the required byte sequence at the start of the function,
10817 rather than relying on an assembler that can produce
10818 the exact encoding required.
10820 However it does mean (in the unpatched case) that we have
10821 a 1 insn window where the asynchronous unwind info is
10822 incorrect. However, if we placed the unwind info at
10823 its correct location we would have incorrect unwind info
10824 in the patched case. Which is probably all moot since
10825 I don't expect Wine generates dwarf2 unwind info for the
10826 system libraries that use this feature. */
10828 insn = emit_insn (gen_blockage ());
10830 push = gen_push (hard_frame_pointer_rtx);
10831 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10832 stack_pointer_rtx);
10833 RTX_FRAME_RELATED_P (push) = 1;
10834 RTX_FRAME_RELATED_P (mov) = 1;
10836 RTX_FRAME_RELATED_P (insn) = 1;
10837 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10838 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10840 /* Note that gen_push incremented m->fs.cfa_offset, even
10841 though we didn't emit the push insn here. */
10842 m->fs.cfa_reg = hard_frame_pointer_rtx;
10843 m->fs.fp_offset = m->fs.cfa_offset;
10844 m->fs.fp_valid = true;
10846 else
10848 /* The frame pointer is not needed so pop %ebp again.
10849 This leaves us with a pristine state. */
10850 emit_insn (gen_pop (hard_frame_pointer_rtx));
10854 /* The first insn of a function that accepts its static chain on the
10855 stack is to push the register that would be filled in by a direct
10856 call. This insn will be skipped by the trampoline. */
10857 else if (ix86_static_chain_on_stack)
10859 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10860 emit_insn (gen_blockage ());
10862 /* We don't want to interpret this push insn as a register save,
10863 only as a stack adjustment. The real copy of the register as
10864 a save will be done later, if needed. */
10865 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10866 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10867 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10868 RTX_FRAME_RELATED_P (insn) = 1;
10871 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10872 of DRAP is needed and stack realignment is really needed after reload */
10873 if (stack_realign_drap)
10875 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10877 /* Only need to push parameter pointer reg if it is caller saved. */
10878 if (!call_used_regs[REGNO (crtl->drap_reg)])
10880 /* Push arg pointer reg */
10881 insn = emit_insn (gen_push (crtl->drap_reg));
10882 RTX_FRAME_RELATED_P (insn) = 1;
10885 /* Grab the argument pointer. */
10886 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10887 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10888 RTX_FRAME_RELATED_P (insn) = 1;
10889 m->fs.cfa_reg = crtl->drap_reg;
10890 m->fs.cfa_offset = 0;
10892 /* Align the stack. */
10893 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10894 stack_pointer_rtx,
10895 GEN_INT (-align_bytes)));
10896 RTX_FRAME_RELATED_P (insn) = 1;
10898 /* Replicate the return address on the stack so that return
10899 address can be reached via (argp - 1) slot. This is needed
10900 to implement macro RETURN_ADDR_RTX and intrinsic function
10901 expand_builtin_return_addr etc. */
10902 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10903 t = gen_frame_mem (word_mode, t);
10904 insn = emit_insn (gen_push (t));
10905 RTX_FRAME_RELATED_P (insn) = 1;
10907 /* For the purposes of frame and register save area addressing,
10908 we've started over with a new frame. */
10909 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10910 m->fs.realigned = true;
10913 int_registers_saved = (frame.nregs == 0);
10914 sse_registers_saved = (frame.nsseregs == 0);
10916 if (frame_pointer_needed && !m->fs.fp_valid)
10918 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10919 slower on all targets. Also sdb doesn't like it. */
10920 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10921 RTX_FRAME_RELATED_P (insn) = 1;
10923 /* Push registers now, before setting the frame pointer
10924 on SEH target. */
10925 if (!int_registers_saved
10926 && TARGET_SEH
10927 && !frame.save_regs_using_mov)
10929 ix86_emit_save_regs ();
10930 int_registers_saved = true;
10931 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10934 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10936 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10937 RTX_FRAME_RELATED_P (insn) = 1;
10939 if (m->fs.cfa_reg == stack_pointer_rtx)
10940 m->fs.cfa_reg = hard_frame_pointer_rtx;
10941 m->fs.fp_offset = m->fs.sp_offset;
10942 m->fs.fp_valid = true;
10946 if (!int_registers_saved)
10948 /* If saving registers via PUSH, do so now. */
10949 if (!frame.save_regs_using_mov)
10951 ix86_emit_save_regs ();
10952 int_registers_saved = true;
10953 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10956 /* When using red zone we may start register saving before allocating
10957 the stack frame saving one cycle of the prologue. However, avoid
10958 doing this if we have to probe the stack; at least on x86_64 the
10959 stack probe can turn into a call that clobbers a red zone location. */
10960 else if (ix86_using_red_zone ()
10961 && (! TARGET_STACK_PROBE
10962 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10964 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10965 int_registers_saved = true;
10969 if (stack_realign_fp)
10971 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10972 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10974 /* The computation of the size of the re-aligned stack frame means
10975 that we must allocate the size of the register save area before
10976 performing the actual alignment. Otherwise we cannot guarantee
10977 that there's enough storage above the realignment point. */
10978 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10979 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10980 GEN_INT (m->fs.sp_offset
10981 - frame.sse_reg_save_offset),
10982 -1, false);
10984 /* Align the stack. */
10985 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10986 stack_pointer_rtx,
10987 GEN_INT (-align_bytes)));
10989 /* For the purposes of register save area addressing, the stack
10990 pointer is no longer valid. As for the value of sp_offset,
10991 see ix86_compute_frame_layout, which we need to match in order
10992 to pass verification of stack_pointer_offset at the end. */
10993 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10994 m->fs.sp_valid = false;
10997 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10999 if (flag_stack_usage_info)
11001 /* We start to count from ARG_POINTER. */
11002 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11004 /* If it was realigned, take into account the fake frame. */
11005 if (stack_realign_drap)
11007 if (ix86_static_chain_on_stack)
11008 stack_size += UNITS_PER_WORD;
11010 if (!call_used_regs[REGNO (crtl->drap_reg)])
11011 stack_size += UNITS_PER_WORD;
11013 /* This over-estimates by 1 minimal-stack-alignment-unit but
11014 mitigates that by counting in the new return address slot. */
11015 current_function_dynamic_stack_size
11016 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11019 current_function_static_stack_size = stack_size;
11022 /* On SEH target with very large frame size, allocate an area to save
11023 SSE registers (as the very large allocation won't be described). */
11024 if (TARGET_SEH
11025 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11026 && !sse_registers_saved)
11028 HOST_WIDE_INT sse_size =
11029 frame.sse_reg_save_offset - frame.reg_save_offset;
11031 gcc_assert (int_registers_saved);
11033 /* No need to do stack checking as the area will be immediately
11034 written. */
11035 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11036 GEN_INT (-sse_size), -1,
11037 m->fs.cfa_reg == stack_pointer_rtx);
11038 allocate -= sse_size;
11039 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11040 sse_registers_saved = true;
11043 /* The stack has already been decremented by the instruction calling us
11044 so probe if the size is non-negative to preserve the protection area. */
11045 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11047 /* We expect the registers to be saved when probes are used. */
11048 gcc_assert (int_registers_saved);
11050 if (STACK_CHECK_MOVING_SP)
11052 if (!(crtl->is_leaf && !cfun->calls_alloca
11053 && allocate <= PROBE_INTERVAL))
11055 ix86_adjust_stack_and_probe (allocate);
11056 allocate = 0;
11059 else
11061 HOST_WIDE_INT size = allocate;
11063 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11064 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11066 if (TARGET_STACK_PROBE)
11068 if (crtl->is_leaf && !cfun->calls_alloca)
11070 if (size > PROBE_INTERVAL)
11071 ix86_emit_probe_stack_range (0, size);
11073 else
11074 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11076 else
11078 if (crtl->is_leaf && !cfun->calls_alloca)
11080 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11081 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11082 size - STACK_CHECK_PROTECT);
11084 else
11085 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11090 if (allocate == 0)
11092 else if (!ix86_target_stack_probe ()
11093 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11095 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11096 GEN_INT (-allocate), -1,
11097 m->fs.cfa_reg == stack_pointer_rtx);
11099 else
11101 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11102 rtx r10 = NULL;
11103 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11104 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11105 bool eax_live = ix86_eax_live_at_start_p ();
11106 bool r10_live = false;
11108 if (TARGET_64BIT)
11109 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11111 if (eax_live)
11113 insn = emit_insn (gen_push (eax));
11114 allocate -= UNITS_PER_WORD;
11115 /* Note that SEH directives need to continue tracking the stack
11116 pointer even after the frame pointer has been set up. */
11117 if (sp_is_cfa_reg || TARGET_SEH)
11119 if (sp_is_cfa_reg)
11120 m->fs.cfa_offset += UNITS_PER_WORD;
11121 RTX_FRAME_RELATED_P (insn) = 1;
11125 if (r10_live)
11127 r10 = gen_rtx_REG (Pmode, R10_REG);
11128 insn = emit_insn (gen_push (r10));
11129 allocate -= UNITS_PER_WORD;
11130 if (sp_is_cfa_reg || TARGET_SEH)
11132 if (sp_is_cfa_reg)
11133 m->fs.cfa_offset += UNITS_PER_WORD;
11134 RTX_FRAME_RELATED_P (insn) = 1;
11138 emit_move_insn (eax, GEN_INT (allocate));
11139 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11141 /* Use the fact that AX still contains ALLOCATE. */
11142 adjust_stack_insn = (Pmode == DImode
11143 ? gen_pro_epilogue_adjust_stack_di_sub
11144 : gen_pro_epilogue_adjust_stack_si_sub);
11146 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11147 stack_pointer_rtx, eax));
11149 if (sp_is_cfa_reg || TARGET_SEH)
11151 if (sp_is_cfa_reg)
11152 m->fs.cfa_offset += allocate;
11153 RTX_FRAME_RELATED_P (insn) = 1;
11154 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11155 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11156 plus_constant (Pmode, stack_pointer_rtx,
11157 -allocate)));
11159 m->fs.sp_offset += allocate;
11161 /* Use stack_pointer_rtx for relative addressing so that code
11162 works for realigned stack, too. */
11163 if (r10_live && eax_live)
11165 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11166 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11167 gen_frame_mem (word_mode, t));
11168 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11169 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11170 gen_frame_mem (word_mode, t));
11172 else if (eax_live || r10_live)
11174 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11175 emit_move_insn (gen_rtx_REG (word_mode,
11176 (eax_live ? AX_REG : R10_REG)),
11177 gen_frame_mem (word_mode, t));
11180 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11182 /* If we havn't already set up the frame pointer, do so now. */
11183 if (frame_pointer_needed && !m->fs.fp_valid)
11185 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11186 GEN_INT (frame.stack_pointer_offset
11187 - frame.hard_frame_pointer_offset));
11188 insn = emit_insn (insn);
11189 RTX_FRAME_RELATED_P (insn) = 1;
11190 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11192 if (m->fs.cfa_reg == stack_pointer_rtx)
11193 m->fs.cfa_reg = hard_frame_pointer_rtx;
11194 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11195 m->fs.fp_valid = true;
11198 if (!int_registers_saved)
11199 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11200 if (!sse_registers_saved)
11201 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11203 pic_reg_used = false;
11204 /* We don't use pic-register for pe-coff target. */
11205 if (pic_offset_table_rtx
11206 && !TARGET_PECOFF
11207 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11208 || crtl->profile))
11210 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11212 if (alt_pic_reg_used != INVALID_REGNUM)
11213 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11215 pic_reg_used = true;
11218 if (pic_reg_used)
11220 if (TARGET_64BIT)
11222 if (ix86_cmodel == CM_LARGE_PIC)
11224 rtx label, tmp_reg;
11226 gcc_assert (Pmode == DImode);
11227 label = gen_label_rtx ();
11228 emit_label (label);
11229 LABEL_PRESERVE_P (label) = 1;
11230 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11231 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11232 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11233 label));
11234 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11235 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11236 pic_offset_table_rtx, tmp_reg));
11238 else
11239 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11241 else
11243 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11244 RTX_FRAME_RELATED_P (insn) = 1;
11245 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11249 /* In the pic_reg_used case, make sure that the got load isn't deleted
11250 when mcount needs it. Blockage to avoid call movement across mcount
11251 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11252 note. */
11253 if (crtl->profile && !flag_fentry && pic_reg_used)
11254 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11256 if (crtl->drap_reg && !crtl->stack_realign_needed)
11258 /* vDRAP is setup but after reload it turns out stack realign
11259 isn't necessary, here we will emit prologue to setup DRAP
11260 without stack realign adjustment */
11261 t = choose_baseaddr (0);
11262 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11265 /* Prevent instructions from being scheduled into register save push
11266 sequence when access to the redzone area is done through frame pointer.
11267 The offset between the frame pointer and the stack pointer is calculated
11268 relative to the value of the stack pointer at the end of the function
11269 prologue, and moving instructions that access redzone area via frame
11270 pointer inside push sequence violates this assumption. */
11271 if (frame_pointer_needed && frame.red_zone_size)
11272 emit_insn (gen_memory_blockage ());
11274 /* Emit cld instruction if stringops are used in the function. */
11275 if (TARGET_CLD && ix86_current_function_needs_cld)
11276 emit_insn (gen_cld ());
11278 /* SEH requires that the prologue end within 256 bytes of the start of
11279 the function. Prevent instruction schedules that would extend that.
11280 Further, prevent alloca modifications to the stack pointer from being
11281 combined with prologue modifications. */
11282 if (TARGET_SEH)
11283 emit_insn (gen_prologue_use (stack_pointer_rtx));
11286 /* Emit code to restore REG using a POP insn. */
11288 static void
11289 ix86_emit_restore_reg_using_pop (rtx reg)
11291 struct machine_function *m = cfun->machine;
11292 rtx insn = emit_insn (gen_pop (reg));
11294 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11295 m->fs.sp_offset -= UNITS_PER_WORD;
11297 if (m->fs.cfa_reg == crtl->drap_reg
11298 && REGNO (reg) == REGNO (crtl->drap_reg))
11300 /* Previously we'd represented the CFA as an expression
11301 like *(%ebp - 8). We've just popped that value from
11302 the stack, which means we need to reset the CFA to
11303 the drap register. This will remain until we restore
11304 the stack pointer. */
11305 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11306 RTX_FRAME_RELATED_P (insn) = 1;
11308 /* This means that the DRAP register is valid for addressing too. */
11309 m->fs.drap_valid = true;
11310 return;
11313 if (m->fs.cfa_reg == stack_pointer_rtx)
11315 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11316 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11317 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11318 RTX_FRAME_RELATED_P (insn) = 1;
11320 m->fs.cfa_offset -= UNITS_PER_WORD;
11323 /* When the frame pointer is the CFA, and we pop it, we are
11324 swapping back to the stack pointer as the CFA. This happens
11325 for stack frames that don't allocate other data, so we assume
11326 the stack pointer is now pointing at the return address, i.e.
11327 the function entry state, which makes the offset be 1 word. */
11328 if (reg == hard_frame_pointer_rtx)
11330 m->fs.fp_valid = false;
11331 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11333 m->fs.cfa_reg = stack_pointer_rtx;
11334 m->fs.cfa_offset -= UNITS_PER_WORD;
11336 add_reg_note (insn, REG_CFA_DEF_CFA,
11337 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11338 GEN_INT (m->fs.cfa_offset)));
11339 RTX_FRAME_RELATED_P (insn) = 1;
11344 /* Emit code to restore saved registers using POP insns. */
11346 static void
11347 ix86_emit_restore_regs_using_pop (void)
11349 unsigned int regno;
11351 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11352 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11353 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11356 /* Emit code and notes for the LEAVE instruction. */
11358 static void
11359 ix86_emit_leave (void)
11361 struct machine_function *m = cfun->machine;
11362 rtx insn = emit_insn (ix86_gen_leave ());
11364 ix86_add_queued_cfa_restore_notes (insn);
11366 gcc_assert (m->fs.fp_valid);
11367 m->fs.sp_valid = true;
11368 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11369 m->fs.fp_valid = false;
11371 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11373 m->fs.cfa_reg = stack_pointer_rtx;
11374 m->fs.cfa_offset = m->fs.sp_offset;
11376 add_reg_note (insn, REG_CFA_DEF_CFA,
11377 plus_constant (Pmode, stack_pointer_rtx,
11378 m->fs.sp_offset));
11379 RTX_FRAME_RELATED_P (insn) = 1;
11381 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11382 m->fs.fp_offset);
11385 /* Emit code to restore saved registers using MOV insns.
11386 First register is restored from CFA - CFA_OFFSET. */
11387 static void
11388 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11389 bool maybe_eh_return)
11391 struct machine_function *m = cfun->machine;
11392 unsigned int regno;
11394 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11395 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11397 rtx reg = gen_rtx_REG (word_mode, regno);
11398 rtx insn, mem;
11400 mem = choose_baseaddr (cfa_offset);
11401 mem = gen_frame_mem (word_mode, mem);
11402 insn = emit_move_insn (reg, mem);
11404 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11406 /* Previously we'd represented the CFA as an expression
11407 like *(%ebp - 8). We've just popped that value from
11408 the stack, which means we need to reset the CFA to
11409 the drap register. This will remain until we restore
11410 the stack pointer. */
11411 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11412 RTX_FRAME_RELATED_P (insn) = 1;
11414 /* This means that the DRAP register is valid for addressing. */
11415 m->fs.drap_valid = true;
11417 else
11418 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11420 cfa_offset -= UNITS_PER_WORD;
11424 /* Emit code to restore saved registers using MOV insns.
11425 First register is restored from CFA - CFA_OFFSET. */
11426 static void
11427 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11428 bool maybe_eh_return)
11430 unsigned int regno;
11432 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11433 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11435 rtx reg = gen_rtx_REG (V4SFmode, regno);
11436 rtx mem;
11438 mem = choose_baseaddr (cfa_offset);
11439 mem = gen_rtx_MEM (V4SFmode, mem);
11440 set_mem_align (mem, 128);
11441 emit_move_insn (reg, mem);
11443 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11445 cfa_offset -= 16;
11449 /* Restore function stack, frame, and registers. */
11451 void
11452 ix86_expand_epilogue (int style)
11454 struct machine_function *m = cfun->machine;
11455 struct machine_frame_state frame_state_save = m->fs;
11456 struct ix86_frame frame;
11457 bool restore_regs_via_mov;
11458 bool using_drap;
11460 ix86_finalize_stack_realign_flags ();
11461 ix86_compute_frame_layout (&frame);
11463 m->fs.sp_valid = (!frame_pointer_needed
11464 || (crtl->sp_is_unchanging
11465 && !stack_realign_fp));
11466 gcc_assert (!m->fs.sp_valid
11467 || m->fs.sp_offset == frame.stack_pointer_offset);
11469 /* The FP must be valid if the frame pointer is present. */
11470 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11471 gcc_assert (!m->fs.fp_valid
11472 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11474 /* We must have *some* valid pointer to the stack frame. */
11475 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11477 /* The DRAP is never valid at this point. */
11478 gcc_assert (!m->fs.drap_valid);
11480 /* See the comment about red zone and frame
11481 pointer usage in ix86_expand_prologue. */
11482 if (frame_pointer_needed && frame.red_zone_size)
11483 emit_insn (gen_memory_blockage ());
11485 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11486 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11488 /* Determine the CFA offset of the end of the red-zone. */
11489 m->fs.red_zone_offset = 0;
11490 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11492 /* The red-zone begins below the return address. */
11493 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11495 /* When the register save area is in the aligned portion of
11496 the stack, determine the maximum runtime displacement that
11497 matches up with the aligned frame. */
11498 if (stack_realign_drap)
11499 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11500 + UNITS_PER_WORD);
11503 /* Special care must be taken for the normal return case of a function
11504 using eh_return: the eax and edx registers are marked as saved, but
11505 not restored along this path. Adjust the save location to match. */
11506 if (crtl->calls_eh_return && style != 2)
11507 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11509 /* EH_RETURN requires the use of moves to function properly. */
11510 if (crtl->calls_eh_return)
11511 restore_regs_via_mov = true;
11512 /* SEH requires the use of pops to identify the epilogue. */
11513 else if (TARGET_SEH)
11514 restore_regs_via_mov = false;
11515 /* If we're only restoring one register and sp is not valid then
11516 using a move instruction to restore the register since it's
11517 less work than reloading sp and popping the register. */
11518 else if (!m->fs.sp_valid && frame.nregs <= 1)
11519 restore_regs_via_mov = true;
11520 else if (TARGET_EPILOGUE_USING_MOVE
11521 && cfun->machine->use_fast_prologue_epilogue
11522 && (frame.nregs > 1
11523 || m->fs.sp_offset != frame.reg_save_offset))
11524 restore_regs_via_mov = true;
11525 else if (frame_pointer_needed
11526 && !frame.nregs
11527 && m->fs.sp_offset != frame.reg_save_offset)
11528 restore_regs_via_mov = true;
11529 else if (frame_pointer_needed
11530 && TARGET_USE_LEAVE
11531 && cfun->machine->use_fast_prologue_epilogue
11532 && frame.nregs == 1)
11533 restore_regs_via_mov = true;
11534 else
11535 restore_regs_via_mov = false;
11537 if (restore_regs_via_mov || frame.nsseregs)
11539 /* Ensure that the entire register save area is addressable via
11540 the stack pointer, if we will restore via sp. */
11541 if (TARGET_64BIT
11542 && m->fs.sp_offset > 0x7fffffff
11543 && !(m->fs.fp_valid || m->fs.drap_valid)
11544 && (frame.nsseregs + frame.nregs) != 0)
11546 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11547 GEN_INT (m->fs.sp_offset
11548 - frame.sse_reg_save_offset),
11549 style,
11550 m->fs.cfa_reg == stack_pointer_rtx);
11554 /* If there are any SSE registers to restore, then we have to do it
11555 via moves, since there's obviously no pop for SSE regs. */
11556 if (frame.nsseregs)
11557 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11558 style == 2);
11560 if (restore_regs_via_mov)
11562 rtx t;
11564 if (frame.nregs)
11565 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11567 /* eh_return epilogues need %ecx added to the stack pointer. */
11568 if (style == 2)
11570 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11572 /* Stack align doesn't work with eh_return. */
11573 gcc_assert (!stack_realign_drap);
11574 /* Neither does regparm nested functions. */
11575 gcc_assert (!ix86_static_chain_on_stack);
11577 if (frame_pointer_needed)
11579 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11580 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11581 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11583 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11584 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11586 /* Note that we use SA as a temporary CFA, as the return
11587 address is at the proper place relative to it. We
11588 pretend this happens at the FP restore insn because
11589 prior to this insn the FP would be stored at the wrong
11590 offset relative to SA, and after this insn we have no
11591 other reasonable register to use for the CFA. We don't
11592 bother resetting the CFA to the SP for the duration of
11593 the return insn. */
11594 add_reg_note (insn, REG_CFA_DEF_CFA,
11595 plus_constant (Pmode, sa, UNITS_PER_WORD));
11596 ix86_add_queued_cfa_restore_notes (insn);
11597 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11598 RTX_FRAME_RELATED_P (insn) = 1;
11600 m->fs.cfa_reg = sa;
11601 m->fs.cfa_offset = UNITS_PER_WORD;
11602 m->fs.fp_valid = false;
11604 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11605 const0_rtx, style, false);
11607 else
11609 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11610 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11611 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11612 ix86_add_queued_cfa_restore_notes (insn);
11614 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11615 if (m->fs.cfa_offset != UNITS_PER_WORD)
11617 m->fs.cfa_offset = UNITS_PER_WORD;
11618 add_reg_note (insn, REG_CFA_DEF_CFA,
11619 plus_constant (Pmode, stack_pointer_rtx,
11620 UNITS_PER_WORD));
11621 RTX_FRAME_RELATED_P (insn) = 1;
11624 m->fs.sp_offset = UNITS_PER_WORD;
11625 m->fs.sp_valid = true;
11628 else
11630 /* SEH requires that the function end with (1) a stack adjustment
11631 if necessary, (2) a sequence of pops, and (3) a return or
11632 jump instruction. Prevent insns from the function body from
11633 being scheduled into this sequence. */
11634 if (TARGET_SEH)
11636 /* Prevent a catch region from being adjacent to the standard
11637 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11638 several other flags that would be interesting to test are
11639 not yet set up. */
11640 if (flag_non_call_exceptions)
11641 emit_insn (gen_nops (const1_rtx));
11642 else
11643 emit_insn (gen_blockage ());
11646 /* First step is to deallocate the stack frame so that we can
11647 pop the registers. Also do it on SEH target for very large
11648 frame as the emitted instructions aren't allowed by the ABI in
11649 epilogues. */
11650 if (!m->fs.sp_valid
11651 || (TARGET_SEH
11652 && (m->fs.sp_offset - frame.reg_save_offset
11653 >= SEH_MAX_FRAME_SIZE)))
11655 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11656 GEN_INT (m->fs.fp_offset
11657 - frame.reg_save_offset),
11658 style, false);
11660 else if (m->fs.sp_offset != frame.reg_save_offset)
11662 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11663 GEN_INT (m->fs.sp_offset
11664 - frame.reg_save_offset),
11665 style,
11666 m->fs.cfa_reg == stack_pointer_rtx);
11669 ix86_emit_restore_regs_using_pop ();
11672 /* If we used a stack pointer and haven't already got rid of it,
11673 then do so now. */
11674 if (m->fs.fp_valid)
11676 /* If the stack pointer is valid and pointing at the frame
11677 pointer store address, then we only need a pop. */
11678 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11679 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11680 /* Leave results in shorter dependency chains on CPUs that are
11681 able to grok it fast. */
11682 else if (TARGET_USE_LEAVE
11683 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11684 || !cfun->machine->use_fast_prologue_epilogue)
11685 ix86_emit_leave ();
11686 else
11688 pro_epilogue_adjust_stack (stack_pointer_rtx,
11689 hard_frame_pointer_rtx,
11690 const0_rtx, style, !using_drap);
11691 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11695 if (using_drap)
11697 int param_ptr_offset = UNITS_PER_WORD;
11698 rtx insn;
11700 gcc_assert (stack_realign_drap);
11702 if (ix86_static_chain_on_stack)
11703 param_ptr_offset += UNITS_PER_WORD;
11704 if (!call_used_regs[REGNO (crtl->drap_reg)])
11705 param_ptr_offset += UNITS_PER_WORD;
11707 insn = emit_insn (gen_rtx_SET
11708 (VOIDmode, stack_pointer_rtx,
11709 gen_rtx_PLUS (Pmode,
11710 crtl->drap_reg,
11711 GEN_INT (-param_ptr_offset))));
11712 m->fs.cfa_reg = stack_pointer_rtx;
11713 m->fs.cfa_offset = param_ptr_offset;
11714 m->fs.sp_offset = param_ptr_offset;
11715 m->fs.realigned = false;
11717 add_reg_note (insn, REG_CFA_DEF_CFA,
11718 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11719 GEN_INT (param_ptr_offset)));
11720 RTX_FRAME_RELATED_P (insn) = 1;
11722 if (!call_used_regs[REGNO (crtl->drap_reg)])
11723 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11726 /* At this point the stack pointer must be valid, and we must have
11727 restored all of the registers. We may not have deallocated the
11728 entire stack frame. We've delayed this until now because it may
11729 be possible to merge the local stack deallocation with the
11730 deallocation forced by ix86_static_chain_on_stack. */
11731 gcc_assert (m->fs.sp_valid);
11732 gcc_assert (!m->fs.fp_valid);
11733 gcc_assert (!m->fs.realigned);
11734 if (m->fs.sp_offset != UNITS_PER_WORD)
11736 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11737 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11738 style, true);
11740 else
11741 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11743 /* Sibcall epilogues don't want a return instruction. */
11744 if (style == 0)
11746 m->fs = frame_state_save;
11747 return;
11750 if (crtl->args.pops_args && crtl->args.size)
11752 rtx popc = GEN_INT (crtl->args.pops_args);
11754 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11755 address, do explicit add, and jump indirectly to the caller. */
11757 if (crtl->args.pops_args >= 65536)
11759 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11760 rtx insn;
11762 /* There is no "pascal" calling convention in any 64bit ABI. */
11763 gcc_assert (!TARGET_64BIT);
11765 insn = emit_insn (gen_pop (ecx));
11766 m->fs.cfa_offset -= UNITS_PER_WORD;
11767 m->fs.sp_offset -= UNITS_PER_WORD;
11769 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11770 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11771 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11772 add_reg_note (insn, REG_CFA_REGISTER,
11773 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11774 RTX_FRAME_RELATED_P (insn) = 1;
11776 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11777 popc, -1, true);
11778 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11780 else
11781 emit_jump_insn (gen_simple_return_pop_internal (popc));
11783 else
11784 emit_jump_insn (gen_simple_return_internal ());
11786 /* Restore the state back to the state from the prologue,
11787 so that it's correct for the next epilogue. */
11788 m->fs = frame_state_save;
11791 /* Reset from the function's potential modifications. */
11793 static void
11794 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11795 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11797 if (pic_offset_table_rtx)
11798 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11799 #if TARGET_MACHO
11800 /* Mach-O doesn't support labels at the end of objects, so if
11801 it looks like we might want one, insert a NOP. */
11803 rtx insn = get_last_insn ();
11804 rtx deleted_debug_label = NULL_RTX;
11805 while (insn
11806 && NOTE_P (insn)
11807 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11809 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11810 notes only, instead set their CODE_LABEL_NUMBER to -1,
11811 otherwise there would be code generation differences
11812 in between -g and -g0. */
11813 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11814 deleted_debug_label = insn;
11815 insn = PREV_INSN (insn);
11817 if (insn
11818 && (LABEL_P (insn)
11819 || (NOTE_P (insn)
11820 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11821 fputs ("\tnop\n", file);
11822 else if (deleted_debug_label)
11823 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11824 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11825 CODE_LABEL_NUMBER (insn) = -1;
11827 #endif
11831 /* Return a scratch register to use in the split stack prologue. The
11832 split stack prologue is used for -fsplit-stack. It is the first
11833 instructions in the function, even before the regular prologue.
11834 The scratch register can be any caller-saved register which is not
11835 used for parameters or for the static chain. */
11837 static unsigned int
11838 split_stack_prologue_scratch_regno (void)
11840 if (TARGET_64BIT)
11841 return R11_REG;
11842 else
11844 bool is_fastcall, is_thiscall;
11845 int regparm;
11847 is_fastcall = (lookup_attribute ("fastcall",
11848 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11849 != NULL);
11850 is_thiscall = (lookup_attribute ("thiscall",
11851 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11852 != NULL);
11853 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11855 if (is_fastcall)
11857 if (DECL_STATIC_CHAIN (cfun->decl))
11859 sorry ("-fsplit-stack does not support fastcall with "
11860 "nested function");
11861 return INVALID_REGNUM;
11863 return AX_REG;
11865 else if (is_thiscall)
11867 if (!DECL_STATIC_CHAIN (cfun->decl))
11868 return DX_REG;
11869 return AX_REG;
11871 else if (regparm < 3)
11873 if (!DECL_STATIC_CHAIN (cfun->decl))
11874 return CX_REG;
11875 else
11877 if (regparm >= 2)
11879 sorry ("-fsplit-stack does not support 2 register "
11880 "parameters for a nested function");
11881 return INVALID_REGNUM;
11883 return DX_REG;
11886 else
11888 /* FIXME: We could make this work by pushing a register
11889 around the addition and comparison. */
11890 sorry ("-fsplit-stack does not support 3 register parameters");
11891 return INVALID_REGNUM;
11896 /* A SYMBOL_REF for the function which allocates new stackspace for
11897 -fsplit-stack. */
11899 static GTY(()) rtx split_stack_fn;
11901 /* A SYMBOL_REF for the more stack function when using the large
11902 model. */
11904 static GTY(()) rtx split_stack_fn_large;
11906 /* Handle -fsplit-stack. These are the first instructions in the
11907 function, even before the regular prologue. */
11909 void
11910 ix86_expand_split_stack_prologue (void)
11912 struct ix86_frame frame;
11913 HOST_WIDE_INT allocate;
11914 unsigned HOST_WIDE_INT args_size;
11915 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11916 rtx scratch_reg = NULL_RTX;
11917 rtx varargs_label = NULL_RTX;
11918 rtx fn;
11920 gcc_assert (flag_split_stack && reload_completed);
11922 ix86_finalize_stack_realign_flags ();
11923 ix86_compute_frame_layout (&frame);
11924 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11926 /* This is the label we will branch to if we have enough stack
11927 space. We expect the basic block reordering pass to reverse this
11928 branch if optimizing, so that we branch in the unlikely case. */
11929 label = gen_label_rtx ();
11931 /* We need to compare the stack pointer minus the frame size with
11932 the stack boundary in the TCB. The stack boundary always gives
11933 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11934 can compare directly. Otherwise we need to do an addition. */
11936 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11937 UNSPEC_STACK_CHECK);
11938 limit = gen_rtx_CONST (Pmode, limit);
11939 limit = gen_rtx_MEM (Pmode, limit);
11940 if (allocate < SPLIT_STACK_AVAILABLE)
11941 current = stack_pointer_rtx;
11942 else
11944 unsigned int scratch_regno;
11945 rtx offset;
11947 /* We need a scratch register to hold the stack pointer minus
11948 the required frame size. Since this is the very start of the
11949 function, the scratch register can be any caller-saved
11950 register which is not used for parameters. */
11951 offset = GEN_INT (- allocate);
11952 scratch_regno = split_stack_prologue_scratch_regno ();
11953 if (scratch_regno == INVALID_REGNUM)
11954 return;
11955 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11956 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11958 /* We don't use ix86_gen_add3 in this case because it will
11959 want to split to lea, but when not optimizing the insn
11960 will not be split after this point. */
11961 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11962 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11963 offset)));
11965 else
11967 emit_move_insn (scratch_reg, offset);
11968 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11969 stack_pointer_rtx));
11971 current = scratch_reg;
11974 ix86_expand_branch (GEU, current, limit, label);
11975 jump_insn = get_last_insn ();
11976 JUMP_LABEL (jump_insn) = label;
11978 /* Mark the jump as very likely to be taken. */
11979 add_int_reg_note (jump_insn, REG_BR_PROB,
11980 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11982 if (split_stack_fn == NULL_RTX)
11983 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11984 fn = split_stack_fn;
11986 /* Get more stack space. We pass in the desired stack space and the
11987 size of the arguments to copy to the new stack. In 32-bit mode
11988 we push the parameters; __morestack will return on a new stack
11989 anyhow. In 64-bit mode we pass the parameters in r10 and
11990 r11. */
11991 allocate_rtx = GEN_INT (allocate);
11992 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11993 call_fusage = NULL_RTX;
11994 if (TARGET_64BIT)
11996 rtx reg10, reg11;
11998 reg10 = gen_rtx_REG (Pmode, R10_REG);
11999 reg11 = gen_rtx_REG (Pmode, R11_REG);
12001 /* If this function uses a static chain, it will be in %r10.
12002 Preserve it across the call to __morestack. */
12003 if (DECL_STATIC_CHAIN (cfun->decl))
12005 rtx rax;
12007 rax = gen_rtx_REG (word_mode, AX_REG);
12008 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12009 use_reg (&call_fusage, rax);
12012 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12013 && !TARGET_PECOFF)
12015 HOST_WIDE_INT argval;
12017 gcc_assert (Pmode == DImode);
12018 /* When using the large model we need to load the address
12019 into a register, and we've run out of registers. So we
12020 switch to a different calling convention, and we call a
12021 different function: __morestack_large. We pass the
12022 argument size in the upper 32 bits of r10 and pass the
12023 frame size in the lower 32 bits. */
12024 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12025 gcc_assert ((args_size & 0xffffffff) == args_size);
12027 if (split_stack_fn_large == NULL_RTX)
12028 split_stack_fn_large =
12029 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12031 if (ix86_cmodel == CM_LARGE_PIC)
12033 rtx label, x;
12035 label = gen_label_rtx ();
12036 emit_label (label);
12037 LABEL_PRESERVE_P (label) = 1;
12038 emit_insn (gen_set_rip_rex64 (reg10, label));
12039 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12040 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12041 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12042 UNSPEC_GOT);
12043 x = gen_rtx_CONST (Pmode, x);
12044 emit_move_insn (reg11, x);
12045 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12046 x = gen_const_mem (Pmode, x);
12047 emit_move_insn (reg11, x);
12049 else
12050 emit_move_insn (reg11, split_stack_fn_large);
12052 fn = reg11;
12054 argval = ((args_size << 16) << 16) + allocate;
12055 emit_move_insn (reg10, GEN_INT (argval));
12057 else
12059 emit_move_insn (reg10, allocate_rtx);
12060 emit_move_insn (reg11, GEN_INT (args_size));
12061 use_reg (&call_fusage, reg11);
12064 use_reg (&call_fusage, reg10);
12066 else
12068 emit_insn (gen_push (GEN_INT (args_size)));
12069 emit_insn (gen_push (allocate_rtx));
12071 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12072 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12073 NULL_RTX, false);
12074 add_function_usage_to (call_insn, call_fusage);
12076 /* In order to make call/return prediction work right, we now need
12077 to execute a return instruction. See
12078 libgcc/config/i386/morestack.S for the details on how this works.
12080 For flow purposes gcc must not see this as a return
12081 instruction--we need control flow to continue at the subsequent
12082 label. Therefore, we use an unspec. */
12083 gcc_assert (crtl->args.pops_args < 65536);
12084 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12086 /* If we are in 64-bit mode and this function uses a static chain,
12087 we saved %r10 in %rax before calling _morestack. */
12088 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12089 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12090 gen_rtx_REG (word_mode, AX_REG));
12092 /* If this function calls va_start, we need to store a pointer to
12093 the arguments on the old stack, because they may not have been
12094 all copied to the new stack. At this point the old stack can be
12095 found at the frame pointer value used by __morestack, because
12096 __morestack has set that up before calling back to us. Here we
12097 store that pointer in a scratch register, and in
12098 ix86_expand_prologue we store the scratch register in a stack
12099 slot. */
12100 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12102 unsigned int scratch_regno;
12103 rtx frame_reg;
12104 int words;
12106 scratch_regno = split_stack_prologue_scratch_regno ();
12107 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12108 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12110 /* 64-bit:
12111 fp -> old fp value
12112 return address within this function
12113 return address of caller of this function
12114 stack arguments
12115 So we add three words to get to the stack arguments.
12117 32-bit:
12118 fp -> old fp value
12119 return address within this function
12120 first argument to __morestack
12121 second argument to __morestack
12122 return address of caller of this function
12123 stack arguments
12124 So we add five words to get to the stack arguments.
12126 words = TARGET_64BIT ? 3 : 5;
12127 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12128 gen_rtx_PLUS (Pmode, frame_reg,
12129 GEN_INT (words * UNITS_PER_WORD))));
12131 varargs_label = gen_label_rtx ();
12132 emit_jump_insn (gen_jump (varargs_label));
12133 JUMP_LABEL (get_last_insn ()) = varargs_label;
12135 emit_barrier ();
12138 emit_label (label);
12139 LABEL_NUSES (label) = 1;
12141 /* If this function calls va_start, we now have to set the scratch
12142 register for the case where we do not call __morestack. In this
12143 case we need to set it based on the stack pointer. */
12144 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12146 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12147 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12148 GEN_INT (UNITS_PER_WORD))));
12150 emit_label (varargs_label);
12151 LABEL_NUSES (varargs_label) = 1;
12155 /* We may have to tell the dataflow pass that the split stack prologue
12156 is initializing a scratch register. */
12158 static void
12159 ix86_live_on_entry (bitmap regs)
12161 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12163 gcc_assert (flag_split_stack);
12164 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12168 /* Extract the parts of an RTL expression that is a valid memory address
12169 for an instruction. Return 0 if the structure of the address is
12170 grossly off. Return -1 if the address contains ASHIFT, so it is not
12171 strictly valid, but still used for computing length of lea instruction. */
12174 ix86_decompose_address (rtx addr, struct ix86_address *out)
12176 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12177 rtx base_reg, index_reg;
12178 HOST_WIDE_INT scale = 1;
12179 rtx scale_rtx = NULL_RTX;
12180 rtx tmp;
12181 int retval = 1;
12182 enum ix86_address_seg seg = SEG_DEFAULT;
12184 /* Allow zero-extended SImode addresses,
12185 they will be emitted with addr32 prefix. */
12186 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12188 if (GET_CODE (addr) == ZERO_EXTEND
12189 && GET_MODE (XEXP (addr, 0)) == SImode)
12191 addr = XEXP (addr, 0);
12192 if (CONST_INT_P (addr))
12193 return 0;
12195 else if (GET_CODE (addr) == AND
12196 && const_32bit_mask (XEXP (addr, 1), DImode))
12198 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12199 if (addr == NULL_RTX)
12200 return 0;
12202 if (CONST_INT_P (addr))
12203 return 0;
12207 /* Allow SImode subregs of DImode addresses,
12208 they will be emitted with addr32 prefix. */
12209 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12211 if (GET_CODE (addr) == SUBREG
12212 && GET_MODE (SUBREG_REG (addr)) == DImode)
12214 addr = SUBREG_REG (addr);
12215 if (CONST_INT_P (addr))
12216 return 0;
12220 if (REG_P (addr))
12221 base = addr;
12222 else if (GET_CODE (addr) == SUBREG)
12224 if (REG_P (SUBREG_REG (addr)))
12225 base = addr;
12226 else
12227 return 0;
12229 else if (GET_CODE (addr) == PLUS)
12231 rtx addends[4], op;
12232 int n = 0, i;
12234 op = addr;
12237 if (n >= 4)
12238 return 0;
12239 addends[n++] = XEXP (op, 1);
12240 op = XEXP (op, 0);
12242 while (GET_CODE (op) == PLUS);
12243 if (n >= 4)
12244 return 0;
12245 addends[n] = op;
12247 for (i = n; i >= 0; --i)
12249 op = addends[i];
12250 switch (GET_CODE (op))
12252 case MULT:
12253 if (index)
12254 return 0;
12255 index = XEXP (op, 0);
12256 scale_rtx = XEXP (op, 1);
12257 break;
12259 case ASHIFT:
12260 if (index)
12261 return 0;
12262 index = XEXP (op, 0);
12263 tmp = XEXP (op, 1);
12264 if (!CONST_INT_P (tmp))
12265 return 0;
12266 scale = INTVAL (tmp);
12267 if ((unsigned HOST_WIDE_INT) scale > 3)
12268 return 0;
12269 scale = 1 << scale;
12270 break;
12272 case ZERO_EXTEND:
12273 op = XEXP (op, 0);
12274 if (GET_CODE (op) != UNSPEC)
12275 return 0;
12276 /* FALLTHRU */
12278 case UNSPEC:
12279 if (XINT (op, 1) == UNSPEC_TP
12280 && TARGET_TLS_DIRECT_SEG_REFS
12281 && seg == SEG_DEFAULT)
12282 seg = DEFAULT_TLS_SEG_REG;
12283 else
12284 return 0;
12285 break;
12287 case SUBREG:
12288 if (!REG_P (SUBREG_REG (op)))
12289 return 0;
12290 /* FALLTHRU */
12292 case REG:
12293 if (!base)
12294 base = op;
12295 else if (!index)
12296 index = op;
12297 else
12298 return 0;
12299 break;
12301 case CONST:
12302 case CONST_INT:
12303 case SYMBOL_REF:
12304 case LABEL_REF:
12305 if (disp)
12306 return 0;
12307 disp = op;
12308 break;
12310 default:
12311 return 0;
12315 else if (GET_CODE (addr) == MULT)
12317 index = XEXP (addr, 0); /* index*scale */
12318 scale_rtx = XEXP (addr, 1);
12320 else if (GET_CODE (addr) == ASHIFT)
12322 /* We're called for lea too, which implements ashift on occasion. */
12323 index = XEXP (addr, 0);
12324 tmp = XEXP (addr, 1);
12325 if (!CONST_INT_P (tmp))
12326 return 0;
12327 scale = INTVAL (tmp);
12328 if ((unsigned HOST_WIDE_INT) scale > 3)
12329 return 0;
12330 scale = 1 << scale;
12331 retval = -1;
12333 else
12334 disp = addr; /* displacement */
12336 if (index)
12338 if (REG_P (index))
12340 else if (GET_CODE (index) == SUBREG
12341 && REG_P (SUBREG_REG (index)))
12343 else
12344 return 0;
12347 /* Extract the integral value of scale. */
12348 if (scale_rtx)
12350 if (!CONST_INT_P (scale_rtx))
12351 return 0;
12352 scale = INTVAL (scale_rtx);
12355 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12356 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12358 /* Avoid useless 0 displacement. */
12359 if (disp == const0_rtx && (base || index))
12360 disp = NULL_RTX;
12362 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12363 if (base_reg && index_reg && scale == 1
12364 && (index_reg == arg_pointer_rtx
12365 || index_reg == frame_pointer_rtx
12366 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12368 rtx tmp;
12369 tmp = base, base = index, index = tmp;
12370 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12373 /* Special case: %ebp cannot be encoded as a base without a displacement.
12374 Similarly %r13. */
12375 if (!disp
12376 && base_reg
12377 && (base_reg == hard_frame_pointer_rtx
12378 || base_reg == frame_pointer_rtx
12379 || base_reg == arg_pointer_rtx
12380 || (REG_P (base_reg)
12381 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12382 || REGNO (base_reg) == R13_REG))))
12383 disp = const0_rtx;
12385 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12386 Avoid this by transforming to [%esi+0].
12387 Reload calls address legitimization without cfun defined, so we need
12388 to test cfun for being non-NULL. */
12389 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12390 && base_reg && !index_reg && !disp
12391 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12392 disp = const0_rtx;
12394 /* Special case: encode reg+reg instead of reg*2. */
12395 if (!base && index && scale == 2)
12396 base = index, base_reg = index_reg, scale = 1;
12398 /* Special case: scaling cannot be encoded without base or displacement. */
12399 if (!base && !disp && index && scale != 1)
12400 disp = const0_rtx;
12402 out->base = base;
12403 out->index = index;
12404 out->disp = disp;
12405 out->scale = scale;
12406 out->seg = seg;
12408 return retval;
12411 /* Return cost of the memory address x.
12412 For i386, it is better to use a complex address than let gcc copy
12413 the address into a reg and make a new pseudo. But not if the address
12414 requires to two regs - that would mean more pseudos with longer
12415 lifetimes. */
12416 static int
12417 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12418 addr_space_t as ATTRIBUTE_UNUSED,
12419 bool speed ATTRIBUTE_UNUSED)
12421 struct ix86_address parts;
12422 int cost = 1;
12423 int ok = ix86_decompose_address (x, &parts);
12425 gcc_assert (ok);
12427 if (parts.base && GET_CODE (parts.base) == SUBREG)
12428 parts.base = SUBREG_REG (parts.base);
12429 if (parts.index && GET_CODE (parts.index) == SUBREG)
12430 parts.index = SUBREG_REG (parts.index);
12432 /* Attempt to minimize number of registers in the address. */
12433 if ((parts.base
12434 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12435 || (parts.index
12436 && (!REG_P (parts.index)
12437 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12438 cost++;
12440 if (parts.base
12441 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12442 && parts.index
12443 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12444 && parts.base != parts.index)
12445 cost++;
12447 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12448 since it's predecode logic can't detect the length of instructions
12449 and it degenerates to vector decoded. Increase cost of such
12450 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12451 to split such addresses or even refuse such addresses at all.
12453 Following addressing modes are affected:
12454 [base+scale*index]
12455 [scale*index+disp]
12456 [base+index]
12458 The first and last case may be avoidable by explicitly coding the zero in
12459 memory address, but I don't have AMD-K6 machine handy to check this
12460 theory. */
12462 if (TARGET_K6
12463 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12464 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12465 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12466 cost += 10;
12468 return cost;
12471 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12472 this is used for to form addresses to local data when -fPIC is in
12473 use. */
12475 static bool
12476 darwin_local_data_pic (rtx disp)
12478 return (GET_CODE (disp) == UNSPEC
12479 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12482 /* Determine if a given RTX is a valid constant. We already know this
12483 satisfies CONSTANT_P. */
12485 static bool
12486 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12488 switch (GET_CODE (x))
12490 case CONST:
12491 x = XEXP (x, 0);
12493 if (GET_CODE (x) == PLUS)
12495 if (!CONST_INT_P (XEXP (x, 1)))
12496 return false;
12497 x = XEXP (x, 0);
12500 if (TARGET_MACHO && darwin_local_data_pic (x))
12501 return true;
12503 /* Only some unspecs are valid as "constants". */
12504 if (GET_CODE (x) == UNSPEC)
12505 switch (XINT (x, 1))
12507 case UNSPEC_GOT:
12508 case UNSPEC_GOTOFF:
12509 case UNSPEC_PLTOFF:
12510 return TARGET_64BIT;
12511 case UNSPEC_TPOFF:
12512 case UNSPEC_NTPOFF:
12513 x = XVECEXP (x, 0, 0);
12514 return (GET_CODE (x) == SYMBOL_REF
12515 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12516 case UNSPEC_DTPOFF:
12517 x = XVECEXP (x, 0, 0);
12518 return (GET_CODE (x) == SYMBOL_REF
12519 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12520 default:
12521 return false;
12524 /* We must have drilled down to a symbol. */
12525 if (GET_CODE (x) == LABEL_REF)
12526 return true;
12527 if (GET_CODE (x) != SYMBOL_REF)
12528 return false;
12529 /* FALLTHRU */
12531 case SYMBOL_REF:
12532 /* TLS symbols are never valid. */
12533 if (SYMBOL_REF_TLS_MODEL (x))
12534 return false;
12536 /* DLLIMPORT symbols are never valid. */
12537 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12538 && SYMBOL_REF_DLLIMPORT_P (x))
12539 return false;
12541 #if TARGET_MACHO
12542 /* mdynamic-no-pic */
12543 if (MACHO_DYNAMIC_NO_PIC_P)
12544 return machopic_symbol_defined_p (x);
12545 #endif
12546 break;
12548 case CONST_DOUBLE:
12549 if (GET_MODE (x) == TImode
12550 && x != CONST0_RTX (TImode)
12551 && !TARGET_64BIT)
12552 return false;
12553 break;
12555 case CONST_VECTOR:
12556 if (!standard_sse_constant_p (x))
12557 return false;
12559 default:
12560 break;
12563 /* Otherwise we handle everything else in the move patterns. */
12564 return true;
12567 /* Determine if it's legal to put X into the constant pool. This
12568 is not possible for the address of thread-local symbols, which
12569 is checked above. */
12571 static bool
12572 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12574 /* We can always put integral constants and vectors in memory. */
12575 switch (GET_CODE (x))
12577 case CONST_INT:
12578 case CONST_DOUBLE:
12579 case CONST_VECTOR:
12580 return false;
12582 default:
12583 break;
12585 return !ix86_legitimate_constant_p (mode, x);
12588 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12589 otherwise zero. */
12591 static bool
12592 is_imported_p (rtx x)
12594 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12595 || GET_CODE (x) != SYMBOL_REF)
12596 return false;
12598 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12602 /* Nonzero if the constant value X is a legitimate general operand
12603 when generating PIC code. It is given that flag_pic is on and
12604 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12606 bool
12607 legitimate_pic_operand_p (rtx x)
12609 rtx inner;
12611 switch (GET_CODE (x))
12613 case CONST:
12614 inner = XEXP (x, 0);
12615 if (GET_CODE (inner) == PLUS
12616 && CONST_INT_P (XEXP (inner, 1)))
12617 inner = XEXP (inner, 0);
12619 /* Only some unspecs are valid as "constants". */
12620 if (GET_CODE (inner) == UNSPEC)
12621 switch (XINT (inner, 1))
12623 case UNSPEC_GOT:
12624 case UNSPEC_GOTOFF:
12625 case UNSPEC_PLTOFF:
12626 return TARGET_64BIT;
12627 case UNSPEC_TPOFF:
12628 x = XVECEXP (inner, 0, 0);
12629 return (GET_CODE (x) == SYMBOL_REF
12630 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12631 case UNSPEC_MACHOPIC_OFFSET:
12632 return legitimate_pic_address_disp_p (x);
12633 default:
12634 return false;
12636 /* FALLTHRU */
12638 case SYMBOL_REF:
12639 case LABEL_REF:
12640 return legitimate_pic_address_disp_p (x);
12642 default:
12643 return true;
12647 /* Determine if a given CONST RTX is a valid memory displacement
12648 in PIC mode. */
12650 bool
12651 legitimate_pic_address_disp_p (rtx disp)
12653 bool saw_plus;
12655 /* In 64bit mode we can allow direct addresses of symbols and labels
12656 when they are not dynamic symbols. */
12657 if (TARGET_64BIT)
12659 rtx op0 = disp, op1;
12661 switch (GET_CODE (disp))
12663 case LABEL_REF:
12664 return true;
12666 case CONST:
12667 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12668 break;
12669 op0 = XEXP (XEXP (disp, 0), 0);
12670 op1 = XEXP (XEXP (disp, 0), 1);
12671 if (!CONST_INT_P (op1)
12672 || INTVAL (op1) >= 16*1024*1024
12673 || INTVAL (op1) < -16*1024*1024)
12674 break;
12675 if (GET_CODE (op0) == LABEL_REF)
12676 return true;
12677 if (GET_CODE (op0) == CONST
12678 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12679 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12680 return true;
12681 if (GET_CODE (op0) == UNSPEC
12682 && XINT (op0, 1) == UNSPEC_PCREL)
12683 return true;
12684 if (GET_CODE (op0) != SYMBOL_REF)
12685 break;
12686 /* FALLTHRU */
12688 case SYMBOL_REF:
12689 /* TLS references should always be enclosed in UNSPEC.
12690 The dllimported symbol needs always to be resolved. */
12691 if (SYMBOL_REF_TLS_MODEL (op0)
12692 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12693 return false;
12695 if (TARGET_PECOFF)
12697 if (is_imported_p (op0))
12698 return true;
12700 if (SYMBOL_REF_FAR_ADDR_P (op0)
12701 || !SYMBOL_REF_LOCAL_P (op0))
12702 break;
12704 /* Function-symbols need to be resolved only for
12705 large-model.
12706 For the small-model we don't need to resolve anything
12707 here. */
12708 if ((ix86_cmodel != CM_LARGE_PIC
12709 && SYMBOL_REF_FUNCTION_P (op0))
12710 || ix86_cmodel == CM_SMALL_PIC)
12711 return true;
12712 /* Non-external symbols don't need to be resolved for
12713 large, and medium-model. */
12714 if ((ix86_cmodel == CM_LARGE_PIC
12715 || ix86_cmodel == CM_MEDIUM_PIC)
12716 && !SYMBOL_REF_EXTERNAL_P (op0))
12717 return true;
12719 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12720 && SYMBOL_REF_LOCAL_P (op0)
12721 && ix86_cmodel != CM_LARGE_PIC)
12722 return true;
12723 break;
12725 default:
12726 break;
12729 if (GET_CODE (disp) != CONST)
12730 return false;
12731 disp = XEXP (disp, 0);
12733 if (TARGET_64BIT)
12735 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12736 of GOT tables. We should not need these anyway. */
12737 if (GET_CODE (disp) != UNSPEC
12738 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12739 && XINT (disp, 1) != UNSPEC_GOTOFF
12740 && XINT (disp, 1) != UNSPEC_PCREL
12741 && XINT (disp, 1) != UNSPEC_PLTOFF))
12742 return false;
12744 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12745 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12746 return false;
12747 return true;
12750 saw_plus = false;
12751 if (GET_CODE (disp) == PLUS)
12753 if (!CONST_INT_P (XEXP (disp, 1)))
12754 return false;
12755 disp = XEXP (disp, 0);
12756 saw_plus = true;
12759 if (TARGET_MACHO && darwin_local_data_pic (disp))
12760 return true;
12762 if (GET_CODE (disp) != UNSPEC)
12763 return false;
12765 switch (XINT (disp, 1))
12767 case UNSPEC_GOT:
12768 if (saw_plus)
12769 return false;
12770 /* We need to check for both symbols and labels because VxWorks loads
12771 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12772 details. */
12773 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12774 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12775 case UNSPEC_GOTOFF:
12776 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12777 While ABI specify also 32bit relocation but we don't produce it in
12778 small PIC model at all. */
12779 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12780 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12781 && !TARGET_64BIT)
12782 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12783 return false;
12784 case UNSPEC_GOTTPOFF:
12785 case UNSPEC_GOTNTPOFF:
12786 case UNSPEC_INDNTPOFF:
12787 if (saw_plus)
12788 return false;
12789 disp = XVECEXP (disp, 0, 0);
12790 return (GET_CODE (disp) == SYMBOL_REF
12791 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12792 case UNSPEC_NTPOFF:
12793 disp = XVECEXP (disp, 0, 0);
12794 return (GET_CODE (disp) == SYMBOL_REF
12795 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12796 case UNSPEC_DTPOFF:
12797 disp = XVECEXP (disp, 0, 0);
12798 return (GET_CODE (disp) == SYMBOL_REF
12799 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12802 return false;
12805 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12806 replace the input X, or the original X if no replacement is called for.
12807 The output parameter *WIN is 1 if the calling macro should goto WIN,
12808 0 if it should not. */
12810 bool
12811 ix86_legitimize_reload_address (rtx x,
12812 enum machine_mode mode ATTRIBUTE_UNUSED,
12813 int opnum, int type,
12814 int ind_levels ATTRIBUTE_UNUSED)
12816 /* Reload can generate:
12818 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12819 (reg:DI 97))
12820 (reg:DI 2 cx))
12822 This RTX is rejected from ix86_legitimate_address_p due to
12823 non-strictness of base register 97. Following this rejection,
12824 reload pushes all three components into separate registers,
12825 creating invalid memory address RTX.
12827 Following code reloads only the invalid part of the
12828 memory address RTX. */
12830 if (GET_CODE (x) == PLUS
12831 && REG_P (XEXP (x, 1))
12832 && GET_CODE (XEXP (x, 0)) == PLUS
12833 && REG_P (XEXP (XEXP (x, 0), 1)))
12835 rtx base, index;
12836 bool something_reloaded = false;
12838 base = XEXP (XEXP (x, 0), 1);
12839 if (!REG_OK_FOR_BASE_STRICT_P (base))
12841 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12842 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12843 opnum, (enum reload_type) type);
12844 something_reloaded = true;
12847 index = XEXP (x, 1);
12848 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12850 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12851 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12852 opnum, (enum reload_type) type);
12853 something_reloaded = true;
12856 gcc_assert (something_reloaded);
12857 return true;
12860 return false;
12863 /* Determine if op is suitable RTX for an address register.
12864 Return naked register if a register or a register subreg is
12865 found, otherwise return NULL_RTX. */
12867 static rtx
12868 ix86_validate_address_register (rtx op)
12870 enum machine_mode mode = GET_MODE (op);
12872 /* Only SImode or DImode registers can form the address. */
12873 if (mode != SImode && mode != DImode)
12874 return NULL_RTX;
12876 if (REG_P (op))
12877 return op;
12878 else if (GET_CODE (op) == SUBREG)
12880 rtx reg = SUBREG_REG (op);
12882 if (!REG_P (reg))
12883 return NULL_RTX;
12885 mode = GET_MODE (reg);
12887 /* Don't allow SUBREGs that span more than a word. It can
12888 lead to spill failures when the register is one word out
12889 of a two word structure. */
12890 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12891 return NULL_RTX;
12893 /* Allow only SUBREGs of non-eliminable hard registers. */
12894 if (register_no_elim_operand (reg, mode))
12895 return reg;
12898 /* Op is not a register. */
12899 return NULL_RTX;
12902 /* Recognizes RTL expressions that are valid memory addresses for an
12903 instruction. The MODE argument is the machine mode for the MEM
12904 expression that wants to use this address.
12906 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12907 convert common non-canonical forms to canonical form so that they will
12908 be recognized. */
12910 static bool
12911 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12912 rtx addr, bool strict)
12914 struct ix86_address parts;
12915 rtx base, index, disp;
12916 HOST_WIDE_INT scale;
12917 enum ix86_address_seg seg;
12919 if (ix86_decompose_address (addr, &parts) <= 0)
12920 /* Decomposition failed. */
12921 return false;
12923 base = parts.base;
12924 index = parts.index;
12925 disp = parts.disp;
12926 scale = parts.scale;
12927 seg = parts.seg;
12929 /* Validate base register. */
12930 if (base)
12932 rtx reg = ix86_validate_address_register (base);
12934 if (reg == NULL_RTX)
12935 return false;
12937 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12938 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12939 /* Base is not valid. */
12940 return false;
12943 /* Validate index register. */
12944 if (index)
12946 rtx reg = ix86_validate_address_register (index);
12948 if (reg == NULL_RTX)
12949 return false;
12951 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12952 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12953 /* Index is not valid. */
12954 return false;
12957 /* Index and base should have the same mode. */
12958 if (base && index
12959 && GET_MODE (base) != GET_MODE (index))
12960 return false;
12962 /* Address override works only on the (%reg) part of %fs:(%reg). */
12963 if (seg != SEG_DEFAULT
12964 && ((base && GET_MODE (base) != word_mode)
12965 || (index && GET_MODE (index) != word_mode)))
12966 return false;
12968 /* Validate scale factor. */
12969 if (scale != 1)
12971 if (!index)
12972 /* Scale without index. */
12973 return false;
12975 if (scale != 2 && scale != 4 && scale != 8)
12976 /* Scale is not a valid multiplier. */
12977 return false;
12980 /* Validate displacement. */
12981 if (disp)
12983 if (GET_CODE (disp) == CONST
12984 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12985 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12986 switch (XINT (XEXP (disp, 0), 1))
12988 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12989 used. While ABI specify also 32bit relocations, we don't produce
12990 them at all and use IP relative instead. */
12991 case UNSPEC_GOT:
12992 case UNSPEC_GOTOFF:
12993 gcc_assert (flag_pic);
12994 if (!TARGET_64BIT)
12995 goto is_legitimate_pic;
12997 /* 64bit address unspec. */
12998 return false;
13000 case UNSPEC_GOTPCREL:
13001 case UNSPEC_PCREL:
13002 gcc_assert (flag_pic);
13003 goto is_legitimate_pic;
13005 case UNSPEC_GOTTPOFF:
13006 case UNSPEC_GOTNTPOFF:
13007 case UNSPEC_INDNTPOFF:
13008 case UNSPEC_NTPOFF:
13009 case UNSPEC_DTPOFF:
13010 break;
13012 case UNSPEC_STACK_CHECK:
13013 gcc_assert (flag_split_stack);
13014 break;
13016 default:
13017 /* Invalid address unspec. */
13018 return false;
13021 else if (SYMBOLIC_CONST (disp)
13022 && (flag_pic
13023 || (TARGET_MACHO
13024 #if TARGET_MACHO
13025 && MACHOPIC_INDIRECT
13026 && !machopic_operand_p (disp)
13027 #endif
13031 is_legitimate_pic:
13032 if (TARGET_64BIT && (index || base))
13034 /* foo@dtpoff(%rX) is ok. */
13035 if (GET_CODE (disp) != CONST
13036 || GET_CODE (XEXP (disp, 0)) != PLUS
13037 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13038 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13039 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13040 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13041 /* Non-constant pic memory reference. */
13042 return false;
13044 else if ((!TARGET_MACHO || flag_pic)
13045 && ! legitimate_pic_address_disp_p (disp))
13046 /* Displacement is an invalid pic construct. */
13047 return false;
13048 #if TARGET_MACHO
13049 else if (MACHO_DYNAMIC_NO_PIC_P
13050 && !ix86_legitimate_constant_p (Pmode, disp))
13051 /* displacment must be referenced via non_lazy_pointer */
13052 return false;
13053 #endif
13055 /* This code used to verify that a symbolic pic displacement
13056 includes the pic_offset_table_rtx register.
13058 While this is good idea, unfortunately these constructs may
13059 be created by "adds using lea" optimization for incorrect
13060 code like:
13062 int a;
13063 int foo(int i)
13065 return *(&a+i);
13068 This code is nonsensical, but results in addressing
13069 GOT table with pic_offset_table_rtx base. We can't
13070 just refuse it easily, since it gets matched by
13071 "addsi3" pattern, that later gets split to lea in the
13072 case output register differs from input. While this
13073 can be handled by separate addsi pattern for this case
13074 that never results in lea, this seems to be easier and
13075 correct fix for crash to disable this test. */
13077 else if (GET_CODE (disp) != LABEL_REF
13078 && !CONST_INT_P (disp)
13079 && (GET_CODE (disp) != CONST
13080 || !ix86_legitimate_constant_p (Pmode, disp))
13081 && (GET_CODE (disp) != SYMBOL_REF
13082 || !ix86_legitimate_constant_p (Pmode, disp)))
13083 /* Displacement is not constant. */
13084 return false;
13085 else if (TARGET_64BIT
13086 && !x86_64_immediate_operand (disp, VOIDmode))
13087 /* Displacement is out of range. */
13088 return false;
13089 /* In x32 mode, constant addresses are sign extended to 64bit, so
13090 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13091 else if (TARGET_X32 && !(index || base)
13092 && CONST_INT_P (disp)
13093 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13094 return false;
13097 /* Everything looks valid. */
13098 return true;
13101 /* Determine if a given RTX is a valid constant address. */
13103 bool
13104 constant_address_p (rtx x)
13106 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13109 /* Return a unique alias set for the GOT. */
13111 static alias_set_type
13112 ix86_GOT_alias_set (void)
13114 static alias_set_type set = -1;
13115 if (set == -1)
13116 set = new_alias_set ();
13117 return set;
13120 /* Return a legitimate reference for ORIG (an address) using the
13121 register REG. If REG is 0, a new pseudo is generated.
13123 There are two types of references that must be handled:
13125 1. Global data references must load the address from the GOT, via
13126 the PIC reg. An insn is emitted to do this load, and the reg is
13127 returned.
13129 2. Static data references, constant pool addresses, and code labels
13130 compute the address as an offset from the GOT, whose base is in
13131 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13132 differentiate them from global data objects. The returned
13133 address is the PIC reg + an unspec constant.
13135 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13136 reg also appears in the address. */
13138 static rtx
13139 legitimize_pic_address (rtx orig, rtx reg)
13141 rtx addr = orig;
13142 rtx new_rtx = orig;
13144 #if TARGET_MACHO
13145 if (TARGET_MACHO && !TARGET_64BIT)
13147 if (reg == 0)
13148 reg = gen_reg_rtx (Pmode);
13149 /* Use the generic Mach-O PIC machinery. */
13150 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13152 #endif
13154 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13156 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13157 if (tmp)
13158 return tmp;
13161 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13162 new_rtx = addr;
13163 else if (TARGET_64BIT && !TARGET_PECOFF
13164 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13166 rtx tmpreg;
13167 /* This symbol may be referenced via a displacement from the PIC
13168 base address (@GOTOFF). */
13170 if (reload_in_progress)
13171 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13172 if (GET_CODE (addr) == CONST)
13173 addr = XEXP (addr, 0);
13174 if (GET_CODE (addr) == PLUS)
13176 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13177 UNSPEC_GOTOFF);
13178 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13180 else
13181 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13182 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13183 if (!reg)
13184 tmpreg = gen_reg_rtx (Pmode);
13185 else
13186 tmpreg = reg;
13187 emit_move_insn (tmpreg, new_rtx);
13189 if (reg != 0)
13191 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13192 tmpreg, 1, OPTAB_DIRECT);
13193 new_rtx = reg;
13195 else
13196 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13198 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13200 /* This symbol may be referenced via a displacement from the PIC
13201 base address (@GOTOFF). */
13203 if (reload_in_progress)
13204 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13205 if (GET_CODE (addr) == CONST)
13206 addr = XEXP (addr, 0);
13207 if (GET_CODE (addr) == PLUS)
13209 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13210 UNSPEC_GOTOFF);
13211 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13213 else
13214 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13215 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13216 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13218 if (reg != 0)
13220 emit_move_insn (reg, new_rtx);
13221 new_rtx = reg;
13224 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13225 /* We can't use @GOTOFF for text labels on VxWorks;
13226 see gotoff_operand. */
13227 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13229 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13230 if (tmp)
13231 return tmp;
13233 /* For x64 PE-COFF there is no GOT table. So we use address
13234 directly. */
13235 if (TARGET_64BIT && TARGET_PECOFF)
13237 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13238 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13240 if (reg == 0)
13241 reg = gen_reg_rtx (Pmode);
13242 emit_move_insn (reg, new_rtx);
13243 new_rtx = reg;
13245 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13247 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13248 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13249 new_rtx = gen_const_mem (Pmode, new_rtx);
13250 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13252 if (reg == 0)
13253 reg = gen_reg_rtx (Pmode);
13254 /* Use directly gen_movsi, otherwise the address is loaded
13255 into register for CSE. We don't want to CSE this addresses,
13256 instead we CSE addresses from the GOT table, so skip this. */
13257 emit_insn (gen_movsi (reg, new_rtx));
13258 new_rtx = reg;
13260 else
13262 /* This symbol must be referenced via a load from the
13263 Global Offset Table (@GOT). */
13265 if (reload_in_progress)
13266 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13267 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13268 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13269 if (TARGET_64BIT)
13270 new_rtx = force_reg (Pmode, new_rtx);
13271 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13272 new_rtx = gen_const_mem (Pmode, new_rtx);
13273 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13275 if (reg == 0)
13276 reg = gen_reg_rtx (Pmode);
13277 emit_move_insn (reg, new_rtx);
13278 new_rtx = reg;
13281 else
13283 if (CONST_INT_P (addr)
13284 && !x86_64_immediate_operand (addr, VOIDmode))
13286 if (reg)
13288 emit_move_insn (reg, addr);
13289 new_rtx = reg;
13291 else
13292 new_rtx = force_reg (Pmode, addr);
13294 else if (GET_CODE (addr) == CONST)
13296 addr = XEXP (addr, 0);
13298 /* We must match stuff we generate before. Assume the only
13299 unspecs that can get here are ours. Not that we could do
13300 anything with them anyway.... */
13301 if (GET_CODE (addr) == UNSPEC
13302 || (GET_CODE (addr) == PLUS
13303 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13304 return orig;
13305 gcc_assert (GET_CODE (addr) == PLUS);
13307 if (GET_CODE (addr) == PLUS)
13309 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13311 /* Check first to see if this is a constant offset from a @GOTOFF
13312 symbol reference. */
13313 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13314 && CONST_INT_P (op1))
13316 if (!TARGET_64BIT)
13318 if (reload_in_progress)
13319 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13320 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13321 UNSPEC_GOTOFF);
13322 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13323 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13324 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13326 if (reg != 0)
13328 emit_move_insn (reg, new_rtx);
13329 new_rtx = reg;
13332 else
13334 if (INTVAL (op1) < -16*1024*1024
13335 || INTVAL (op1) >= 16*1024*1024)
13337 if (!x86_64_immediate_operand (op1, Pmode))
13338 op1 = force_reg (Pmode, op1);
13339 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13343 else
13345 rtx base = legitimize_pic_address (op0, reg);
13346 enum machine_mode mode = GET_MODE (base);
13347 new_rtx
13348 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13350 if (CONST_INT_P (new_rtx))
13352 if (INTVAL (new_rtx) < -16*1024*1024
13353 || INTVAL (new_rtx) >= 16*1024*1024)
13355 if (!x86_64_immediate_operand (new_rtx, mode))
13356 new_rtx = force_reg (mode, new_rtx);
13357 new_rtx
13358 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13360 else
13361 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13363 else
13365 if (GET_CODE (new_rtx) == PLUS
13366 && CONSTANT_P (XEXP (new_rtx, 1)))
13368 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13369 new_rtx = XEXP (new_rtx, 1);
13371 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13376 return new_rtx;
13379 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13381 static rtx
13382 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13384 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13386 if (GET_MODE (tp) != tp_mode)
13388 gcc_assert (GET_MODE (tp) == SImode);
13389 gcc_assert (tp_mode == DImode);
13391 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13394 if (to_reg)
13395 tp = copy_to_mode_reg (tp_mode, tp);
13397 return tp;
13400 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13402 static GTY(()) rtx ix86_tls_symbol;
13404 static rtx
13405 ix86_tls_get_addr (void)
13407 if (!ix86_tls_symbol)
13409 const char *sym
13410 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13411 ? "___tls_get_addr" : "__tls_get_addr");
13413 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13416 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13418 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13419 UNSPEC_PLTOFF);
13420 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13421 gen_rtx_CONST (Pmode, unspec));
13424 return ix86_tls_symbol;
13427 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13429 static GTY(()) rtx ix86_tls_module_base_symbol;
13432 ix86_tls_module_base (void)
13434 if (!ix86_tls_module_base_symbol)
13436 ix86_tls_module_base_symbol
13437 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13439 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13440 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13443 return ix86_tls_module_base_symbol;
13446 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13447 false if we expect this to be used for a memory address and true if
13448 we expect to load the address into a register. */
13450 static rtx
13451 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13453 rtx dest, base, off;
13454 rtx pic = NULL_RTX, tp = NULL_RTX;
13455 enum machine_mode tp_mode = Pmode;
13456 int type;
13458 /* Fall back to global dynamic model if tool chain cannot support local
13459 dynamic. */
13460 if (TARGET_SUN_TLS && !TARGET_64BIT
13461 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13462 && model == TLS_MODEL_LOCAL_DYNAMIC)
13463 model = TLS_MODEL_GLOBAL_DYNAMIC;
13465 switch (model)
13467 case TLS_MODEL_GLOBAL_DYNAMIC:
13468 dest = gen_reg_rtx (Pmode);
13470 if (!TARGET_64BIT)
13472 if (flag_pic && !TARGET_PECOFF)
13473 pic = pic_offset_table_rtx;
13474 else
13476 pic = gen_reg_rtx (Pmode);
13477 emit_insn (gen_set_got (pic));
13481 if (TARGET_GNU2_TLS)
13483 if (TARGET_64BIT)
13484 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13485 else
13486 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13488 tp = get_thread_pointer (Pmode, true);
13489 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13491 if (GET_MODE (x) != Pmode)
13492 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13494 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13496 else
13498 rtx caddr = ix86_tls_get_addr ();
13500 if (TARGET_64BIT)
13502 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13503 rtx insns;
13505 start_sequence ();
13506 emit_call_insn
13507 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13508 insns = get_insns ();
13509 end_sequence ();
13511 if (GET_MODE (x) != Pmode)
13512 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13514 RTL_CONST_CALL_P (insns) = 1;
13515 emit_libcall_block (insns, dest, rax, x);
13517 else
13518 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13520 break;
13522 case TLS_MODEL_LOCAL_DYNAMIC:
13523 base = gen_reg_rtx (Pmode);
13525 if (!TARGET_64BIT)
13527 if (flag_pic)
13528 pic = pic_offset_table_rtx;
13529 else
13531 pic = gen_reg_rtx (Pmode);
13532 emit_insn (gen_set_got (pic));
13536 if (TARGET_GNU2_TLS)
13538 rtx tmp = ix86_tls_module_base ();
13540 if (TARGET_64BIT)
13541 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13542 else
13543 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13545 tp = get_thread_pointer (Pmode, true);
13546 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13547 gen_rtx_MINUS (Pmode, tmp, tp));
13549 else
13551 rtx caddr = ix86_tls_get_addr ();
13553 if (TARGET_64BIT)
13555 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13556 rtx insns, eqv;
13558 start_sequence ();
13559 emit_call_insn
13560 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13561 insns = get_insns ();
13562 end_sequence ();
13564 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13565 share the LD_BASE result with other LD model accesses. */
13566 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13567 UNSPEC_TLS_LD_BASE);
13569 RTL_CONST_CALL_P (insns) = 1;
13570 emit_libcall_block (insns, base, rax, eqv);
13572 else
13573 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13576 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13577 off = gen_rtx_CONST (Pmode, off);
13579 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13581 if (TARGET_GNU2_TLS)
13583 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13585 if (GET_MODE (x) != Pmode)
13586 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13588 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13590 break;
13592 case TLS_MODEL_INITIAL_EXEC:
13593 if (TARGET_64BIT)
13595 if (TARGET_SUN_TLS && !TARGET_X32)
13597 /* The Sun linker took the AMD64 TLS spec literally
13598 and can only handle %rax as destination of the
13599 initial executable code sequence. */
13601 dest = gen_reg_rtx (DImode);
13602 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13603 return dest;
13606 /* Generate DImode references to avoid %fs:(%reg32)
13607 problems and linker IE->LE relaxation bug. */
13608 tp_mode = DImode;
13609 pic = NULL;
13610 type = UNSPEC_GOTNTPOFF;
13612 else if (flag_pic)
13614 if (reload_in_progress)
13615 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13616 pic = pic_offset_table_rtx;
13617 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13619 else if (!TARGET_ANY_GNU_TLS)
13621 pic = gen_reg_rtx (Pmode);
13622 emit_insn (gen_set_got (pic));
13623 type = UNSPEC_GOTTPOFF;
13625 else
13627 pic = NULL;
13628 type = UNSPEC_INDNTPOFF;
13631 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13632 off = gen_rtx_CONST (tp_mode, off);
13633 if (pic)
13634 off = gen_rtx_PLUS (tp_mode, pic, off);
13635 off = gen_const_mem (tp_mode, off);
13636 set_mem_alias_set (off, ix86_GOT_alias_set ());
13638 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13640 base = get_thread_pointer (tp_mode,
13641 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13642 off = force_reg (tp_mode, off);
13643 return gen_rtx_PLUS (tp_mode, base, off);
13645 else
13647 base = get_thread_pointer (Pmode, true);
13648 dest = gen_reg_rtx (Pmode);
13649 emit_insn (ix86_gen_sub3 (dest, base, off));
13651 break;
13653 case TLS_MODEL_LOCAL_EXEC:
13654 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13655 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13656 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13657 off = gen_rtx_CONST (Pmode, off);
13659 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13661 base = get_thread_pointer (Pmode,
13662 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13663 return gen_rtx_PLUS (Pmode, base, off);
13665 else
13667 base = get_thread_pointer (Pmode, true);
13668 dest = gen_reg_rtx (Pmode);
13669 emit_insn (ix86_gen_sub3 (dest, base, off));
13671 break;
13673 default:
13674 gcc_unreachable ();
13677 return dest;
13680 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13681 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13682 unique refptr-DECL symbol corresponding to symbol DECL. */
13684 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13685 htab_t dllimport_map;
13687 static tree
13688 get_dllimport_decl (tree decl, bool beimport)
13690 struct tree_map *h, in;
13691 void **loc;
13692 const char *name;
13693 const char *prefix;
13694 size_t namelen, prefixlen;
13695 char *imp_name;
13696 tree to;
13697 rtx rtl;
13699 if (!dllimport_map)
13700 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13702 in.hash = htab_hash_pointer (decl);
13703 in.base.from = decl;
13704 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13705 h = (struct tree_map *) *loc;
13706 if (h)
13707 return h->to;
13709 *loc = h = ggc_alloc<tree_map> ();
13710 h->hash = in.hash;
13711 h->base.from = decl;
13712 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13713 VAR_DECL, NULL, ptr_type_node);
13714 DECL_ARTIFICIAL (to) = 1;
13715 DECL_IGNORED_P (to) = 1;
13716 DECL_EXTERNAL (to) = 1;
13717 TREE_READONLY (to) = 1;
13719 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13720 name = targetm.strip_name_encoding (name);
13721 if (beimport)
13722 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13723 ? "*__imp_" : "*__imp__";
13724 else
13725 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13726 namelen = strlen (name);
13727 prefixlen = strlen (prefix);
13728 imp_name = (char *) alloca (namelen + prefixlen + 1);
13729 memcpy (imp_name, prefix, prefixlen);
13730 memcpy (imp_name + prefixlen, name, namelen + 1);
13732 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13733 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13734 SET_SYMBOL_REF_DECL (rtl, to);
13735 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13736 if (!beimport)
13738 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13739 #ifdef SUB_TARGET_RECORD_STUB
13740 SUB_TARGET_RECORD_STUB (name);
13741 #endif
13744 rtl = gen_const_mem (Pmode, rtl);
13745 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13747 SET_DECL_RTL (to, rtl);
13748 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13750 return to;
13753 /* Expand SYMBOL into its corresponding far-addresse symbol.
13754 WANT_REG is true if we require the result be a register. */
13756 static rtx
13757 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13759 tree imp_decl;
13760 rtx x;
13762 gcc_assert (SYMBOL_REF_DECL (symbol));
13763 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13765 x = DECL_RTL (imp_decl);
13766 if (want_reg)
13767 x = force_reg (Pmode, x);
13768 return x;
13771 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13772 true if we require the result be a register. */
13774 static rtx
13775 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13777 tree imp_decl;
13778 rtx x;
13780 gcc_assert (SYMBOL_REF_DECL (symbol));
13781 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13783 x = DECL_RTL (imp_decl);
13784 if (want_reg)
13785 x = force_reg (Pmode, x);
13786 return x;
13789 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13790 is true if we require the result be a register. */
13792 static rtx
13793 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13795 if (!TARGET_PECOFF)
13796 return NULL_RTX;
13798 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13800 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13801 return legitimize_dllimport_symbol (addr, inreg);
13802 if (GET_CODE (addr) == CONST
13803 && GET_CODE (XEXP (addr, 0)) == PLUS
13804 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13805 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13807 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13808 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13812 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13813 return NULL_RTX;
13814 if (GET_CODE (addr) == SYMBOL_REF
13815 && !is_imported_p (addr)
13816 && SYMBOL_REF_EXTERNAL_P (addr)
13817 && SYMBOL_REF_DECL (addr))
13818 return legitimize_pe_coff_extern_decl (addr, inreg);
13820 if (GET_CODE (addr) == CONST
13821 && GET_CODE (XEXP (addr, 0)) == PLUS
13822 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13823 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13824 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13825 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13827 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13828 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13830 return NULL_RTX;
13833 /* Try machine-dependent ways of modifying an illegitimate address
13834 to be legitimate. If we find one, return the new, valid address.
13835 This macro is used in only one place: `memory_address' in explow.c.
13837 OLDX is the address as it was before break_out_memory_refs was called.
13838 In some cases it is useful to look at this to decide what needs to be done.
13840 It is always safe for this macro to do nothing. It exists to recognize
13841 opportunities to optimize the output.
13843 For the 80386, we handle X+REG by loading X into a register R and
13844 using R+REG. R will go in a general reg and indexing will be used.
13845 However, if REG is a broken-out memory address or multiplication,
13846 nothing needs to be done because REG can certainly go in a general reg.
13848 When -fpic is used, special handling is needed for symbolic references.
13849 See comments by legitimize_pic_address in i386.c for details. */
13851 static rtx
13852 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13853 enum machine_mode mode)
13855 int changed = 0;
13856 unsigned log;
13858 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13859 if (log)
13860 return legitimize_tls_address (x, (enum tls_model) log, false);
13861 if (GET_CODE (x) == CONST
13862 && GET_CODE (XEXP (x, 0)) == PLUS
13863 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13864 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13866 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13867 (enum tls_model) log, false);
13868 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13871 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13873 rtx tmp = legitimize_pe_coff_symbol (x, true);
13874 if (tmp)
13875 return tmp;
13878 if (flag_pic && SYMBOLIC_CONST (x))
13879 return legitimize_pic_address (x, 0);
13881 #if TARGET_MACHO
13882 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13883 return machopic_indirect_data_reference (x, 0);
13884 #endif
13886 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13887 if (GET_CODE (x) == ASHIFT
13888 && CONST_INT_P (XEXP (x, 1))
13889 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13891 changed = 1;
13892 log = INTVAL (XEXP (x, 1));
13893 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13894 GEN_INT (1 << log));
13897 if (GET_CODE (x) == PLUS)
13899 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13901 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13902 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13903 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13905 changed = 1;
13906 log = INTVAL (XEXP (XEXP (x, 0), 1));
13907 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13908 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13909 GEN_INT (1 << log));
13912 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13913 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13914 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13916 changed = 1;
13917 log = INTVAL (XEXP (XEXP (x, 1), 1));
13918 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13919 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13920 GEN_INT (1 << log));
13923 /* Put multiply first if it isn't already. */
13924 if (GET_CODE (XEXP (x, 1)) == MULT)
13926 rtx tmp = XEXP (x, 0);
13927 XEXP (x, 0) = XEXP (x, 1);
13928 XEXP (x, 1) = tmp;
13929 changed = 1;
13932 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13933 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13934 created by virtual register instantiation, register elimination, and
13935 similar optimizations. */
13936 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13938 changed = 1;
13939 x = gen_rtx_PLUS (Pmode,
13940 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13941 XEXP (XEXP (x, 1), 0)),
13942 XEXP (XEXP (x, 1), 1));
13945 /* Canonicalize
13946 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13947 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13948 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13949 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13950 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13951 && CONSTANT_P (XEXP (x, 1)))
13953 rtx constant;
13954 rtx other = NULL_RTX;
13956 if (CONST_INT_P (XEXP (x, 1)))
13958 constant = XEXP (x, 1);
13959 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13961 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13963 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13964 other = XEXP (x, 1);
13966 else
13967 constant = 0;
13969 if (constant)
13971 changed = 1;
13972 x = gen_rtx_PLUS (Pmode,
13973 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13974 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13975 plus_constant (Pmode, other,
13976 INTVAL (constant)));
13980 if (changed && ix86_legitimate_address_p (mode, x, false))
13981 return x;
13983 if (GET_CODE (XEXP (x, 0)) == MULT)
13985 changed = 1;
13986 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13989 if (GET_CODE (XEXP (x, 1)) == MULT)
13991 changed = 1;
13992 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13995 if (changed
13996 && REG_P (XEXP (x, 1))
13997 && REG_P (XEXP (x, 0)))
13998 return x;
14000 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14002 changed = 1;
14003 x = legitimize_pic_address (x, 0);
14006 if (changed && ix86_legitimate_address_p (mode, x, false))
14007 return x;
14009 if (REG_P (XEXP (x, 0)))
14011 rtx temp = gen_reg_rtx (Pmode);
14012 rtx val = force_operand (XEXP (x, 1), temp);
14013 if (val != temp)
14015 val = convert_to_mode (Pmode, val, 1);
14016 emit_move_insn (temp, val);
14019 XEXP (x, 1) = temp;
14020 return x;
14023 else if (REG_P (XEXP (x, 1)))
14025 rtx temp = gen_reg_rtx (Pmode);
14026 rtx val = force_operand (XEXP (x, 0), temp);
14027 if (val != temp)
14029 val = convert_to_mode (Pmode, val, 1);
14030 emit_move_insn (temp, val);
14033 XEXP (x, 0) = temp;
14034 return x;
14038 return x;
14041 /* Print an integer constant expression in assembler syntax. Addition
14042 and subtraction are the only arithmetic that may appear in these
14043 expressions. FILE is the stdio stream to write to, X is the rtx, and
14044 CODE is the operand print code from the output string. */
14046 static void
14047 output_pic_addr_const (FILE *file, rtx x, int code)
14049 char buf[256];
14051 switch (GET_CODE (x))
14053 case PC:
14054 gcc_assert (flag_pic);
14055 putc ('.', file);
14056 break;
14058 case SYMBOL_REF:
14059 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14060 output_addr_const (file, x);
14061 else
14063 const char *name = XSTR (x, 0);
14065 /* Mark the decl as referenced so that cgraph will
14066 output the function. */
14067 if (SYMBOL_REF_DECL (x))
14068 mark_decl_referenced (SYMBOL_REF_DECL (x));
14070 #if TARGET_MACHO
14071 if (MACHOPIC_INDIRECT
14072 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14073 name = machopic_indirection_name (x, /*stub_p=*/true);
14074 #endif
14075 assemble_name (file, name);
14077 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14078 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14079 fputs ("@PLT", file);
14080 break;
14082 case LABEL_REF:
14083 x = XEXP (x, 0);
14084 /* FALLTHRU */
14085 case CODE_LABEL:
14086 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14087 assemble_name (asm_out_file, buf);
14088 break;
14090 case CONST_INT:
14091 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14092 break;
14094 case CONST:
14095 /* This used to output parentheses around the expression,
14096 but that does not work on the 386 (either ATT or BSD assembler). */
14097 output_pic_addr_const (file, XEXP (x, 0), code);
14098 break;
14100 case CONST_DOUBLE:
14101 if (GET_MODE (x) == VOIDmode)
14103 /* We can use %d if the number is <32 bits and positive. */
14104 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14105 fprintf (file, "0x%lx%08lx",
14106 (unsigned long) CONST_DOUBLE_HIGH (x),
14107 (unsigned long) CONST_DOUBLE_LOW (x));
14108 else
14109 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14111 else
14112 /* We can't handle floating point constants;
14113 TARGET_PRINT_OPERAND must handle them. */
14114 output_operand_lossage ("floating constant misused");
14115 break;
14117 case PLUS:
14118 /* Some assemblers need integer constants to appear first. */
14119 if (CONST_INT_P (XEXP (x, 0)))
14121 output_pic_addr_const (file, XEXP (x, 0), code);
14122 putc ('+', file);
14123 output_pic_addr_const (file, XEXP (x, 1), code);
14125 else
14127 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14128 output_pic_addr_const (file, XEXP (x, 1), code);
14129 putc ('+', file);
14130 output_pic_addr_const (file, XEXP (x, 0), code);
14132 break;
14134 case MINUS:
14135 if (!TARGET_MACHO)
14136 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14137 output_pic_addr_const (file, XEXP (x, 0), code);
14138 putc ('-', file);
14139 output_pic_addr_const (file, XEXP (x, 1), code);
14140 if (!TARGET_MACHO)
14141 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14142 break;
14144 case UNSPEC:
14145 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14147 bool f = i386_asm_output_addr_const_extra (file, x);
14148 gcc_assert (f);
14149 break;
14152 gcc_assert (XVECLEN (x, 0) == 1);
14153 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14154 switch (XINT (x, 1))
14156 case UNSPEC_GOT:
14157 fputs ("@GOT", file);
14158 break;
14159 case UNSPEC_GOTOFF:
14160 fputs ("@GOTOFF", file);
14161 break;
14162 case UNSPEC_PLTOFF:
14163 fputs ("@PLTOFF", file);
14164 break;
14165 case UNSPEC_PCREL:
14166 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14167 "(%rip)" : "[rip]", file);
14168 break;
14169 case UNSPEC_GOTPCREL:
14170 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14171 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14172 break;
14173 case UNSPEC_GOTTPOFF:
14174 /* FIXME: This might be @TPOFF in Sun ld too. */
14175 fputs ("@gottpoff", file);
14176 break;
14177 case UNSPEC_TPOFF:
14178 fputs ("@tpoff", file);
14179 break;
14180 case UNSPEC_NTPOFF:
14181 if (TARGET_64BIT)
14182 fputs ("@tpoff", file);
14183 else
14184 fputs ("@ntpoff", file);
14185 break;
14186 case UNSPEC_DTPOFF:
14187 fputs ("@dtpoff", file);
14188 break;
14189 case UNSPEC_GOTNTPOFF:
14190 if (TARGET_64BIT)
14191 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14192 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14193 else
14194 fputs ("@gotntpoff", file);
14195 break;
14196 case UNSPEC_INDNTPOFF:
14197 fputs ("@indntpoff", file);
14198 break;
14199 #if TARGET_MACHO
14200 case UNSPEC_MACHOPIC_OFFSET:
14201 putc ('-', file);
14202 machopic_output_function_base_name (file);
14203 break;
14204 #endif
14205 default:
14206 output_operand_lossage ("invalid UNSPEC as operand");
14207 break;
14209 break;
14211 default:
14212 output_operand_lossage ("invalid expression as operand");
14216 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14217 We need to emit DTP-relative relocations. */
14219 static void ATTRIBUTE_UNUSED
14220 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14222 fputs (ASM_LONG, file);
14223 output_addr_const (file, x);
14224 fputs ("@dtpoff", file);
14225 switch (size)
14227 case 4:
14228 break;
14229 case 8:
14230 fputs (", 0", file);
14231 break;
14232 default:
14233 gcc_unreachable ();
14237 /* Return true if X is a representation of the PIC register. This copes
14238 with calls from ix86_find_base_term, where the register might have
14239 been replaced by a cselib value. */
14241 static bool
14242 ix86_pic_register_p (rtx x)
14244 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14245 return (pic_offset_table_rtx
14246 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14247 else
14248 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14251 /* Helper function for ix86_delegitimize_address.
14252 Attempt to delegitimize TLS local-exec accesses. */
14254 static rtx
14255 ix86_delegitimize_tls_address (rtx orig_x)
14257 rtx x = orig_x, unspec;
14258 struct ix86_address addr;
14260 if (!TARGET_TLS_DIRECT_SEG_REFS)
14261 return orig_x;
14262 if (MEM_P (x))
14263 x = XEXP (x, 0);
14264 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14265 return orig_x;
14266 if (ix86_decompose_address (x, &addr) == 0
14267 || addr.seg != DEFAULT_TLS_SEG_REG
14268 || addr.disp == NULL_RTX
14269 || GET_CODE (addr.disp) != CONST)
14270 return orig_x;
14271 unspec = XEXP (addr.disp, 0);
14272 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14273 unspec = XEXP (unspec, 0);
14274 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14275 return orig_x;
14276 x = XVECEXP (unspec, 0, 0);
14277 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14278 if (unspec != XEXP (addr.disp, 0))
14279 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14280 if (addr.index)
14282 rtx idx = addr.index;
14283 if (addr.scale != 1)
14284 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14285 x = gen_rtx_PLUS (Pmode, idx, x);
14287 if (addr.base)
14288 x = gen_rtx_PLUS (Pmode, addr.base, x);
14289 if (MEM_P (orig_x))
14290 x = replace_equiv_address_nv (orig_x, x);
14291 return x;
14294 /* In the name of slightly smaller debug output, and to cater to
14295 general assembler lossage, recognize PIC+GOTOFF and turn it back
14296 into a direct symbol reference.
14298 On Darwin, this is necessary to avoid a crash, because Darwin
14299 has a different PIC label for each routine but the DWARF debugging
14300 information is not associated with any particular routine, so it's
14301 necessary to remove references to the PIC label from RTL stored by
14302 the DWARF output code. */
14304 static rtx
14305 ix86_delegitimize_address (rtx x)
14307 rtx orig_x = delegitimize_mem_from_attrs (x);
14308 /* addend is NULL or some rtx if x is something+GOTOFF where
14309 something doesn't include the PIC register. */
14310 rtx addend = NULL_RTX;
14311 /* reg_addend is NULL or a multiple of some register. */
14312 rtx reg_addend = NULL_RTX;
14313 /* const_addend is NULL or a const_int. */
14314 rtx const_addend = NULL_RTX;
14315 /* This is the result, or NULL. */
14316 rtx result = NULL_RTX;
14318 x = orig_x;
14320 if (MEM_P (x))
14321 x = XEXP (x, 0);
14323 if (TARGET_64BIT)
14325 if (GET_CODE (x) == CONST
14326 && GET_CODE (XEXP (x, 0)) == PLUS
14327 && GET_MODE (XEXP (x, 0)) == Pmode
14328 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14329 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14330 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14332 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14333 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14334 if (MEM_P (orig_x))
14335 x = replace_equiv_address_nv (orig_x, x);
14336 return x;
14339 if (GET_CODE (x) == CONST
14340 && GET_CODE (XEXP (x, 0)) == UNSPEC
14341 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14342 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14343 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14345 x = XVECEXP (XEXP (x, 0), 0, 0);
14346 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14348 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14349 GET_MODE (x), 0);
14350 if (x == NULL_RTX)
14351 return orig_x;
14353 return x;
14356 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14357 return ix86_delegitimize_tls_address (orig_x);
14359 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14360 and -mcmodel=medium -fpic. */
14363 if (GET_CODE (x) != PLUS
14364 || GET_CODE (XEXP (x, 1)) != CONST)
14365 return ix86_delegitimize_tls_address (orig_x);
14367 if (ix86_pic_register_p (XEXP (x, 0)))
14368 /* %ebx + GOT/GOTOFF */
14370 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14372 /* %ebx + %reg * scale + GOT/GOTOFF */
14373 reg_addend = XEXP (x, 0);
14374 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14375 reg_addend = XEXP (reg_addend, 1);
14376 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14377 reg_addend = XEXP (reg_addend, 0);
14378 else
14380 reg_addend = NULL_RTX;
14381 addend = XEXP (x, 0);
14384 else
14385 addend = XEXP (x, 0);
14387 x = XEXP (XEXP (x, 1), 0);
14388 if (GET_CODE (x) == PLUS
14389 && CONST_INT_P (XEXP (x, 1)))
14391 const_addend = XEXP (x, 1);
14392 x = XEXP (x, 0);
14395 if (GET_CODE (x) == UNSPEC
14396 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14397 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14398 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14399 && !MEM_P (orig_x) && !addend)))
14400 result = XVECEXP (x, 0, 0);
14402 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14403 && !MEM_P (orig_x))
14404 result = XVECEXP (x, 0, 0);
14406 if (! result)
14407 return ix86_delegitimize_tls_address (orig_x);
14409 if (const_addend)
14410 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14411 if (reg_addend)
14412 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14413 if (addend)
14415 /* If the rest of original X doesn't involve the PIC register, add
14416 addend and subtract pic_offset_table_rtx. This can happen e.g.
14417 for code like:
14418 leal (%ebx, %ecx, 4), %ecx
14420 movl foo@GOTOFF(%ecx), %edx
14421 in which case we return (%ecx - %ebx) + foo. */
14422 if (pic_offset_table_rtx)
14423 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14424 pic_offset_table_rtx),
14425 result);
14426 else
14427 return orig_x;
14429 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14431 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14432 if (result == NULL_RTX)
14433 return orig_x;
14435 return result;
14438 /* If X is a machine specific address (i.e. a symbol or label being
14439 referenced as a displacement from the GOT implemented using an
14440 UNSPEC), then return the base term. Otherwise return X. */
14443 ix86_find_base_term (rtx x)
14445 rtx term;
14447 if (TARGET_64BIT)
14449 if (GET_CODE (x) != CONST)
14450 return x;
14451 term = XEXP (x, 0);
14452 if (GET_CODE (term) == PLUS
14453 && (CONST_INT_P (XEXP (term, 1))
14454 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14455 term = XEXP (term, 0);
14456 if (GET_CODE (term) != UNSPEC
14457 || (XINT (term, 1) != UNSPEC_GOTPCREL
14458 && XINT (term, 1) != UNSPEC_PCREL))
14459 return x;
14461 return XVECEXP (term, 0, 0);
14464 return ix86_delegitimize_address (x);
14467 static void
14468 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14469 bool fp, FILE *file)
14471 const char *suffix;
14473 if (mode == CCFPmode || mode == CCFPUmode)
14475 code = ix86_fp_compare_code_to_integer (code);
14476 mode = CCmode;
14478 if (reverse)
14479 code = reverse_condition (code);
14481 switch (code)
14483 case EQ:
14484 switch (mode)
14486 case CCAmode:
14487 suffix = "a";
14488 break;
14490 case CCCmode:
14491 suffix = "c";
14492 break;
14494 case CCOmode:
14495 suffix = "o";
14496 break;
14498 case CCSmode:
14499 suffix = "s";
14500 break;
14502 default:
14503 suffix = "e";
14505 break;
14506 case NE:
14507 switch (mode)
14509 case CCAmode:
14510 suffix = "na";
14511 break;
14513 case CCCmode:
14514 suffix = "nc";
14515 break;
14517 case CCOmode:
14518 suffix = "no";
14519 break;
14521 case CCSmode:
14522 suffix = "ns";
14523 break;
14525 default:
14526 suffix = "ne";
14528 break;
14529 case GT:
14530 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14531 suffix = "g";
14532 break;
14533 case GTU:
14534 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14535 Those same assemblers have the same but opposite lossage on cmov. */
14536 if (mode == CCmode)
14537 suffix = fp ? "nbe" : "a";
14538 else
14539 gcc_unreachable ();
14540 break;
14541 case LT:
14542 switch (mode)
14544 case CCNOmode:
14545 case CCGOCmode:
14546 suffix = "s";
14547 break;
14549 case CCmode:
14550 case CCGCmode:
14551 suffix = "l";
14552 break;
14554 default:
14555 gcc_unreachable ();
14557 break;
14558 case LTU:
14559 if (mode == CCmode)
14560 suffix = "b";
14561 else if (mode == CCCmode)
14562 suffix = "c";
14563 else
14564 gcc_unreachable ();
14565 break;
14566 case GE:
14567 switch (mode)
14569 case CCNOmode:
14570 case CCGOCmode:
14571 suffix = "ns";
14572 break;
14574 case CCmode:
14575 case CCGCmode:
14576 suffix = "ge";
14577 break;
14579 default:
14580 gcc_unreachable ();
14582 break;
14583 case GEU:
14584 if (mode == CCmode)
14585 suffix = fp ? "nb" : "ae";
14586 else if (mode == CCCmode)
14587 suffix = "nc";
14588 else
14589 gcc_unreachable ();
14590 break;
14591 case LE:
14592 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14593 suffix = "le";
14594 break;
14595 case LEU:
14596 if (mode == CCmode)
14597 suffix = "be";
14598 else
14599 gcc_unreachable ();
14600 break;
14601 case UNORDERED:
14602 suffix = fp ? "u" : "p";
14603 break;
14604 case ORDERED:
14605 suffix = fp ? "nu" : "np";
14606 break;
14607 default:
14608 gcc_unreachable ();
14610 fputs (suffix, file);
14613 /* Print the name of register X to FILE based on its machine mode and number.
14614 If CODE is 'w', pretend the mode is HImode.
14615 If CODE is 'b', pretend the mode is QImode.
14616 If CODE is 'k', pretend the mode is SImode.
14617 If CODE is 'q', pretend the mode is DImode.
14618 If CODE is 'x', pretend the mode is V4SFmode.
14619 If CODE is 't', pretend the mode is V8SFmode.
14620 If CODE is 'g', pretend the mode is V16SFmode.
14621 If CODE is 'h', pretend the reg is the 'high' byte register.
14622 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14623 If CODE is 'd', duplicate the operand for AVX instruction.
14626 void
14627 print_reg (rtx x, int code, FILE *file)
14629 const char *reg;
14630 unsigned int regno;
14631 bool duplicated = code == 'd' && TARGET_AVX;
14633 if (ASSEMBLER_DIALECT == ASM_ATT)
14634 putc ('%', file);
14636 if (x == pc_rtx)
14638 gcc_assert (TARGET_64BIT);
14639 fputs ("rip", file);
14640 return;
14643 regno = true_regnum (x);
14644 gcc_assert (regno != ARG_POINTER_REGNUM
14645 && regno != FRAME_POINTER_REGNUM
14646 && regno != FLAGS_REG
14647 && regno != FPSR_REG
14648 && regno != FPCR_REG);
14650 if (code == 'w' || MMX_REG_P (x))
14651 code = 2;
14652 else if (code == 'b')
14653 code = 1;
14654 else if (code == 'k')
14655 code = 4;
14656 else if (code == 'q')
14657 code = 8;
14658 else if (code == 'y')
14659 code = 3;
14660 else if (code == 'h')
14661 code = 0;
14662 else if (code == 'x')
14663 code = 16;
14664 else if (code == 't')
14665 code = 32;
14666 else if (code == 'g')
14667 code = 64;
14668 else
14669 code = GET_MODE_SIZE (GET_MODE (x));
14671 /* Irritatingly, AMD extended registers use different naming convention
14672 from the normal registers: "r%d[bwd]" */
14673 if (REX_INT_REGNO_P (regno))
14675 gcc_assert (TARGET_64BIT);
14676 putc ('r', file);
14677 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14678 switch (code)
14680 case 0:
14681 error ("extended registers have no high halves");
14682 break;
14683 case 1:
14684 putc ('b', file);
14685 break;
14686 case 2:
14687 putc ('w', file);
14688 break;
14689 case 4:
14690 putc ('d', file);
14691 break;
14692 case 8:
14693 /* no suffix */
14694 break;
14695 default:
14696 error ("unsupported operand size for extended register");
14697 break;
14699 return;
14702 reg = NULL;
14703 switch (code)
14705 case 3:
14706 if (STACK_TOP_P (x))
14708 reg = "st(0)";
14709 break;
14711 /* FALLTHRU */
14712 case 8:
14713 case 4:
14714 case 12:
14715 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14716 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14717 /* FALLTHRU */
14718 case 16:
14719 case 2:
14720 normal:
14721 reg = hi_reg_name[regno];
14722 break;
14723 case 1:
14724 if (regno >= ARRAY_SIZE (qi_reg_name))
14725 goto normal;
14726 reg = qi_reg_name[regno];
14727 break;
14728 case 0:
14729 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14730 goto normal;
14731 reg = qi_high_reg_name[regno];
14732 break;
14733 case 32:
14734 if (SSE_REG_P (x))
14736 gcc_assert (!duplicated);
14737 putc ('y', file);
14738 fputs (hi_reg_name[regno] + 1, file);
14739 return;
14741 case 64:
14742 if (SSE_REG_P (x))
14744 gcc_assert (!duplicated);
14745 putc ('z', file);
14746 fputs (hi_reg_name[REGNO (x)] + 1, file);
14747 return;
14749 break;
14750 default:
14751 gcc_unreachable ();
14754 fputs (reg, file);
14755 if (duplicated)
14757 if (ASSEMBLER_DIALECT == ASM_ATT)
14758 fprintf (file, ", %%%s", reg);
14759 else
14760 fprintf (file, ", %s", reg);
14764 /* Locate some local-dynamic symbol still in use by this function
14765 so that we can print its name in some tls_local_dynamic_base
14766 pattern. */
14768 static int
14769 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14771 rtx x = *px;
14773 if (GET_CODE (x) == SYMBOL_REF
14774 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14776 cfun->machine->some_ld_name = XSTR (x, 0);
14777 return 1;
14780 return 0;
14783 static const char *
14784 get_some_local_dynamic_name (void)
14786 rtx insn;
14788 if (cfun->machine->some_ld_name)
14789 return cfun->machine->some_ld_name;
14791 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14792 if (NONDEBUG_INSN_P (insn)
14793 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14794 return cfun->machine->some_ld_name;
14796 return NULL;
14799 /* Meaning of CODE:
14800 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14801 C -- print opcode suffix for set/cmov insn.
14802 c -- like C, but print reversed condition
14803 F,f -- likewise, but for floating-point.
14804 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14805 otherwise nothing
14806 R -- print embeded rounding and sae.
14807 r -- print only sae.
14808 z -- print the opcode suffix for the size of the current operand.
14809 Z -- likewise, with special suffixes for x87 instructions.
14810 * -- print a star (in certain assembler syntax)
14811 A -- print an absolute memory reference.
14812 E -- print address with DImode register names if TARGET_64BIT.
14813 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14814 s -- print a shift double count, followed by the assemblers argument
14815 delimiter.
14816 b -- print the QImode name of the register for the indicated operand.
14817 %b0 would print %al if operands[0] is reg 0.
14818 w -- likewise, print the HImode name of the register.
14819 k -- likewise, print the SImode name of the register.
14820 q -- likewise, print the DImode name of the register.
14821 x -- likewise, print the V4SFmode name of the register.
14822 t -- likewise, print the V8SFmode name of the register.
14823 g -- likewise, print the V16SFmode name of the register.
14824 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14825 y -- print "st(0)" instead of "st" as a register.
14826 d -- print duplicated register operand for AVX instruction.
14827 D -- print condition for SSE cmp instruction.
14828 P -- if PIC, print an @PLT suffix.
14829 p -- print raw symbol name.
14830 X -- don't print any sort of PIC '@' suffix for a symbol.
14831 & -- print some in-use local-dynamic symbol name.
14832 H -- print a memory address offset by 8; used for sse high-parts
14833 Y -- print condition for XOP pcom* instruction.
14834 + -- print a branch hint as 'cs' or 'ds' prefix
14835 ; -- print a semicolon (after prefixes due to bug in older gas).
14836 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14837 @ -- print a segment register of thread base pointer load
14838 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14841 void
14842 ix86_print_operand (FILE *file, rtx x, int code)
14844 if (code)
14846 switch (code)
14848 case 'A':
14849 switch (ASSEMBLER_DIALECT)
14851 case ASM_ATT:
14852 putc ('*', file);
14853 break;
14855 case ASM_INTEL:
14856 /* Intel syntax. For absolute addresses, registers should not
14857 be surrounded by braces. */
14858 if (!REG_P (x))
14860 putc ('[', file);
14861 ix86_print_operand (file, x, 0);
14862 putc (']', file);
14863 return;
14865 break;
14867 default:
14868 gcc_unreachable ();
14871 ix86_print_operand (file, x, 0);
14872 return;
14874 case 'E':
14875 /* Wrap address in an UNSPEC to declare special handling. */
14876 if (TARGET_64BIT)
14877 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14879 output_address (x);
14880 return;
14882 case 'L':
14883 if (ASSEMBLER_DIALECT == ASM_ATT)
14884 putc ('l', file);
14885 return;
14887 case 'W':
14888 if (ASSEMBLER_DIALECT == ASM_ATT)
14889 putc ('w', file);
14890 return;
14892 case 'B':
14893 if (ASSEMBLER_DIALECT == ASM_ATT)
14894 putc ('b', file);
14895 return;
14897 case 'Q':
14898 if (ASSEMBLER_DIALECT == ASM_ATT)
14899 putc ('l', file);
14900 return;
14902 case 'S':
14903 if (ASSEMBLER_DIALECT == ASM_ATT)
14904 putc ('s', file);
14905 return;
14907 case 'T':
14908 if (ASSEMBLER_DIALECT == ASM_ATT)
14909 putc ('t', file);
14910 return;
14912 case 'O':
14913 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14914 if (ASSEMBLER_DIALECT != ASM_ATT)
14915 return;
14917 switch (GET_MODE_SIZE (GET_MODE (x)))
14919 case 2:
14920 putc ('w', file);
14921 break;
14923 case 4:
14924 putc ('l', file);
14925 break;
14927 case 8:
14928 putc ('q', file);
14929 break;
14931 default:
14932 output_operand_lossage
14933 ("invalid operand size for operand code 'O'");
14934 return;
14937 putc ('.', file);
14938 #endif
14939 return;
14941 case 'z':
14942 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14944 /* Opcodes don't get size suffixes if using Intel opcodes. */
14945 if (ASSEMBLER_DIALECT == ASM_INTEL)
14946 return;
14948 switch (GET_MODE_SIZE (GET_MODE (x)))
14950 case 1:
14951 putc ('b', file);
14952 return;
14954 case 2:
14955 putc ('w', file);
14956 return;
14958 case 4:
14959 putc ('l', file);
14960 return;
14962 case 8:
14963 putc ('q', file);
14964 return;
14966 default:
14967 output_operand_lossage
14968 ("invalid operand size for operand code 'z'");
14969 return;
14973 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14974 warning
14975 (0, "non-integer operand used with operand code 'z'");
14976 /* FALLTHRU */
14978 case 'Z':
14979 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14980 if (ASSEMBLER_DIALECT == ASM_INTEL)
14981 return;
14983 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14985 switch (GET_MODE_SIZE (GET_MODE (x)))
14987 case 2:
14988 #ifdef HAVE_AS_IX86_FILDS
14989 putc ('s', file);
14990 #endif
14991 return;
14993 case 4:
14994 putc ('l', file);
14995 return;
14997 case 8:
14998 #ifdef HAVE_AS_IX86_FILDQ
14999 putc ('q', file);
15000 #else
15001 fputs ("ll", file);
15002 #endif
15003 return;
15005 default:
15006 break;
15009 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15011 /* 387 opcodes don't get size suffixes
15012 if the operands are registers. */
15013 if (STACK_REG_P (x))
15014 return;
15016 switch (GET_MODE_SIZE (GET_MODE (x)))
15018 case 4:
15019 putc ('s', file);
15020 return;
15022 case 8:
15023 putc ('l', file);
15024 return;
15026 case 12:
15027 case 16:
15028 putc ('t', file);
15029 return;
15031 default:
15032 break;
15035 else
15037 output_operand_lossage
15038 ("invalid operand type used with operand code 'Z'");
15039 return;
15042 output_operand_lossage
15043 ("invalid operand size for operand code 'Z'");
15044 return;
15046 case 'd':
15047 case 'b':
15048 case 'w':
15049 case 'k':
15050 case 'q':
15051 case 'h':
15052 case 't':
15053 case 'g':
15054 case 'y':
15055 case 'x':
15056 case 'X':
15057 case 'P':
15058 case 'p':
15059 break;
15061 case 's':
15062 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15064 ix86_print_operand (file, x, 0);
15065 fputs (", ", file);
15067 return;
15069 case 'Y':
15070 switch (GET_CODE (x))
15072 case NE:
15073 fputs ("neq", file);
15074 break;
15075 case EQ:
15076 fputs ("eq", file);
15077 break;
15078 case GE:
15079 case GEU:
15080 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15081 break;
15082 case GT:
15083 case GTU:
15084 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15085 break;
15086 case LE:
15087 case LEU:
15088 fputs ("le", file);
15089 break;
15090 case LT:
15091 case LTU:
15092 fputs ("lt", file);
15093 break;
15094 case UNORDERED:
15095 fputs ("unord", file);
15096 break;
15097 case ORDERED:
15098 fputs ("ord", file);
15099 break;
15100 case UNEQ:
15101 fputs ("ueq", file);
15102 break;
15103 case UNGE:
15104 fputs ("nlt", file);
15105 break;
15106 case UNGT:
15107 fputs ("nle", file);
15108 break;
15109 case UNLE:
15110 fputs ("ule", file);
15111 break;
15112 case UNLT:
15113 fputs ("ult", file);
15114 break;
15115 case LTGT:
15116 fputs ("une", file);
15117 break;
15118 default:
15119 output_operand_lossage ("operand is not a condition code, "
15120 "invalid operand code 'Y'");
15121 return;
15123 return;
15125 case 'D':
15126 /* Little bit of braindamage here. The SSE compare instructions
15127 does use completely different names for the comparisons that the
15128 fp conditional moves. */
15129 switch (GET_CODE (x))
15131 case UNEQ:
15132 if (TARGET_AVX)
15134 fputs ("eq_us", file);
15135 break;
15137 case EQ:
15138 fputs ("eq", file);
15139 break;
15140 case UNLT:
15141 if (TARGET_AVX)
15143 fputs ("nge", file);
15144 break;
15146 case LT:
15147 fputs ("lt", file);
15148 break;
15149 case UNLE:
15150 if (TARGET_AVX)
15152 fputs ("ngt", file);
15153 break;
15155 case LE:
15156 fputs ("le", file);
15157 break;
15158 case UNORDERED:
15159 fputs ("unord", file);
15160 break;
15161 case LTGT:
15162 if (TARGET_AVX)
15164 fputs ("neq_oq", file);
15165 break;
15167 case NE:
15168 fputs ("neq", file);
15169 break;
15170 case GE:
15171 if (TARGET_AVX)
15173 fputs ("ge", file);
15174 break;
15176 case UNGE:
15177 fputs ("nlt", file);
15178 break;
15179 case GT:
15180 if (TARGET_AVX)
15182 fputs ("gt", file);
15183 break;
15185 case UNGT:
15186 fputs ("nle", file);
15187 break;
15188 case ORDERED:
15189 fputs ("ord", file);
15190 break;
15191 default:
15192 output_operand_lossage ("operand is not a condition code, "
15193 "invalid operand code 'D'");
15194 return;
15196 return;
15198 case 'F':
15199 case 'f':
15200 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15201 if (ASSEMBLER_DIALECT == ASM_ATT)
15202 putc ('.', file);
15203 #endif
15205 case 'C':
15206 case 'c':
15207 if (!COMPARISON_P (x))
15209 output_operand_lossage ("operand is not a condition code, "
15210 "invalid operand code '%c'", code);
15211 return;
15213 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15214 code == 'c' || code == 'f',
15215 code == 'F' || code == 'f',
15216 file);
15217 return;
15219 case 'H':
15220 if (!offsettable_memref_p (x))
15222 output_operand_lossage ("operand is not an offsettable memory "
15223 "reference, invalid operand code 'H'");
15224 return;
15226 /* It doesn't actually matter what mode we use here, as we're
15227 only going to use this for printing. */
15228 x = adjust_address_nv (x, DImode, 8);
15229 /* Output 'qword ptr' for intel assembler dialect. */
15230 if (ASSEMBLER_DIALECT == ASM_INTEL)
15231 code = 'q';
15232 break;
15234 case 'K':
15235 gcc_assert (CONST_INT_P (x));
15237 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15238 #ifdef HAVE_AS_IX86_HLE
15239 fputs ("xacquire ", file);
15240 #else
15241 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15242 #endif
15243 else if (INTVAL (x) & IX86_HLE_RELEASE)
15244 #ifdef HAVE_AS_IX86_HLE
15245 fputs ("xrelease ", file);
15246 #else
15247 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15248 #endif
15249 /* We do not want to print value of the operand. */
15250 return;
15252 case 'N':
15253 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15254 fputs ("{z}", file);
15255 return;
15257 case 'r':
15258 gcc_assert (CONST_INT_P (x));
15259 gcc_assert (INTVAL (x) == ROUND_SAE);
15261 if (ASSEMBLER_DIALECT == ASM_INTEL)
15262 fputs (", ", file);
15264 fputs ("{sae}", file);
15266 if (ASSEMBLER_DIALECT == ASM_ATT)
15267 fputs (", ", file);
15269 return;
15271 case 'R':
15272 gcc_assert (CONST_INT_P (x));
15274 if (ASSEMBLER_DIALECT == ASM_INTEL)
15275 fputs (", ", file);
15277 switch (INTVAL (x))
15279 case ROUND_NEAREST_INT | ROUND_SAE:
15280 fputs ("{rn-sae}", file);
15281 break;
15282 case ROUND_NEG_INF | ROUND_SAE:
15283 fputs ("{rd-sae}", file);
15284 break;
15285 case ROUND_POS_INF | ROUND_SAE:
15286 fputs ("{ru-sae}", file);
15287 break;
15288 case ROUND_ZERO | ROUND_SAE:
15289 fputs ("{rz-sae}", file);
15290 break;
15291 default:
15292 gcc_unreachable ();
15295 if (ASSEMBLER_DIALECT == ASM_ATT)
15296 fputs (", ", file);
15298 return;
15300 case '*':
15301 if (ASSEMBLER_DIALECT == ASM_ATT)
15302 putc ('*', file);
15303 return;
15305 case '&':
15307 const char *name = get_some_local_dynamic_name ();
15308 if (name == NULL)
15309 output_operand_lossage ("'%%&' used without any "
15310 "local dynamic TLS references");
15311 else
15312 assemble_name (file, name);
15313 return;
15316 case '+':
15318 rtx x;
15320 if (!optimize
15321 || optimize_function_for_size_p (cfun)
15322 || !TARGET_BRANCH_PREDICTION_HINTS)
15323 return;
15325 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15326 if (x)
15328 int pred_val = XINT (x, 0);
15330 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15331 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15333 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15334 bool cputaken
15335 = final_forward_branch_p (current_output_insn) == 0;
15337 /* Emit hints only in the case default branch prediction
15338 heuristics would fail. */
15339 if (taken != cputaken)
15341 /* We use 3e (DS) prefix for taken branches and
15342 2e (CS) prefix for not taken branches. */
15343 if (taken)
15344 fputs ("ds ; ", file);
15345 else
15346 fputs ("cs ; ", file);
15350 return;
15353 case ';':
15354 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15355 putc (';', file);
15356 #endif
15357 return;
15359 case '@':
15360 if (ASSEMBLER_DIALECT == ASM_ATT)
15361 putc ('%', file);
15363 /* The kernel uses a different segment register for performance
15364 reasons; a system call would not have to trash the userspace
15365 segment register, which would be expensive. */
15366 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15367 fputs ("fs", file);
15368 else
15369 fputs ("gs", file);
15370 return;
15372 case '~':
15373 putc (TARGET_AVX2 ? 'i' : 'f', file);
15374 return;
15376 case '^':
15377 if (TARGET_64BIT && Pmode != word_mode)
15378 fputs ("addr32 ", file);
15379 return;
15381 default:
15382 output_operand_lossage ("invalid operand code '%c'", code);
15386 if (REG_P (x))
15387 print_reg (x, code, file);
15389 else if (MEM_P (x))
15391 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15392 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15393 && GET_MODE (x) != BLKmode)
15395 const char * size;
15396 switch (GET_MODE_SIZE (GET_MODE (x)))
15398 case 1: size = "BYTE"; break;
15399 case 2: size = "WORD"; break;
15400 case 4: size = "DWORD"; break;
15401 case 8: size = "QWORD"; break;
15402 case 12: size = "TBYTE"; break;
15403 case 16:
15404 if (GET_MODE (x) == XFmode)
15405 size = "TBYTE";
15406 else
15407 size = "XMMWORD";
15408 break;
15409 case 32: size = "YMMWORD"; break;
15410 case 64: size = "ZMMWORD"; break;
15411 default:
15412 gcc_unreachable ();
15415 /* Check for explicit size override (codes 'b', 'w', 'k',
15416 'q' and 'x') */
15417 if (code == 'b')
15418 size = "BYTE";
15419 else if (code == 'w')
15420 size = "WORD";
15421 else if (code == 'k')
15422 size = "DWORD";
15423 else if (code == 'q')
15424 size = "QWORD";
15425 else if (code == 'x')
15426 size = "XMMWORD";
15428 fputs (size, file);
15429 fputs (" PTR ", file);
15432 x = XEXP (x, 0);
15433 /* Avoid (%rip) for call operands. */
15434 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15435 && !CONST_INT_P (x))
15436 output_addr_const (file, x);
15437 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15438 output_operand_lossage ("invalid constraints for operand");
15439 else
15440 output_address (x);
15443 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15445 REAL_VALUE_TYPE r;
15446 long l;
15448 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15449 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15451 if (ASSEMBLER_DIALECT == ASM_ATT)
15452 putc ('$', file);
15453 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15454 if (code == 'q')
15455 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15456 (unsigned long long) (int) l);
15457 else
15458 fprintf (file, "0x%08x", (unsigned int) l);
15461 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15463 REAL_VALUE_TYPE r;
15464 long l[2];
15466 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15467 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15469 if (ASSEMBLER_DIALECT == ASM_ATT)
15470 putc ('$', file);
15471 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15474 /* These float cases don't actually occur as immediate operands. */
15475 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15477 char dstr[30];
15479 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15480 fputs (dstr, file);
15483 else
15485 /* We have patterns that allow zero sets of memory, for instance.
15486 In 64-bit mode, we should probably support all 8-byte vectors,
15487 since we can in fact encode that into an immediate. */
15488 if (GET_CODE (x) == CONST_VECTOR)
15490 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15491 x = const0_rtx;
15494 if (code != 'P' && code != 'p')
15496 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15498 if (ASSEMBLER_DIALECT == ASM_ATT)
15499 putc ('$', file);
15501 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15502 || GET_CODE (x) == LABEL_REF)
15504 if (ASSEMBLER_DIALECT == ASM_ATT)
15505 putc ('$', file);
15506 else
15507 fputs ("OFFSET FLAT:", file);
15510 if (CONST_INT_P (x))
15511 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15512 else if (flag_pic || MACHOPIC_INDIRECT)
15513 output_pic_addr_const (file, x, code);
15514 else
15515 output_addr_const (file, x);
15519 static bool
15520 ix86_print_operand_punct_valid_p (unsigned char code)
15522 return (code == '@' || code == '*' || code == '+' || code == '&'
15523 || code == ';' || code == '~' || code == '^');
15526 /* Print a memory operand whose address is ADDR. */
15528 static void
15529 ix86_print_operand_address (FILE *file, rtx addr)
15531 struct ix86_address parts;
15532 rtx base, index, disp;
15533 int scale;
15534 int ok;
15535 bool vsib = false;
15536 int code = 0;
15538 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15540 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15541 gcc_assert (parts.index == NULL_RTX);
15542 parts.index = XVECEXP (addr, 0, 1);
15543 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15544 addr = XVECEXP (addr, 0, 0);
15545 vsib = true;
15547 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15549 gcc_assert (TARGET_64BIT);
15550 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15551 code = 'q';
15553 else
15554 ok = ix86_decompose_address (addr, &parts);
15556 gcc_assert (ok);
15558 base = parts.base;
15559 index = parts.index;
15560 disp = parts.disp;
15561 scale = parts.scale;
15563 switch (parts.seg)
15565 case SEG_DEFAULT:
15566 break;
15567 case SEG_FS:
15568 case SEG_GS:
15569 if (ASSEMBLER_DIALECT == ASM_ATT)
15570 putc ('%', file);
15571 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15572 break;
15573 default:
15574 gcc_unreachable ();
15577 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15578 if (TARGET_64BIT && !base && !index)
15580 rtx symbol = disp;
15582 if (GET_CODE (disp) == CONST
15583 && GET_CODE (XEXP (disp, 0)) == PLUS
15584 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15585 symbol = XEXP (XEXP (disp, 0), 0);
15587 if (GET_CODE (symbol) == LABEL_REF
15588 || (GET_CODE (symbol) == SYMBOL_REF
15589 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15590 base = pc_rtx;
15592 if (!base && !index)
15594 /* Displacement only requires special attention. */
15596 if (CONST_INT_P (disp))
15598 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15599 fputs ("ds:", file);
15600 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15602 else if (flag_pic)
15603 output_pic_addr_const (file, disp, 0);
15604 else
15605 output_addr_const (file, disp);
15607 else
15609 /* Print SImode register names to force addr32 prefix. */
15610 if (SImode_address_operand (addr, VOIDmode))
15612 #ifdef ENABLE_CHECKING
15613 gcc_assert (TARGET_64BIT);
15614 switch (GET_CODE (addr))
15616 case SUBREG:
15617 gcc_assert (GET_MODE (addr) == SImode);
15618 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15619 break;
15620 case ZERO_EXTEND:
15621 case AND:
15622 gcc_assert (GET_MODE (addr) == DImode);
15623 break;
15624 default:
15625 gcc_unreachable ();
15627 #endif
15628 gcc_assert (!code);
15629 code = 'k';
15631 else if (code == 0
15632 && TARGET_X32
15633 && disp
15634 && CONST_INT_P (disp)
15635 && INTVAL (disp) < -16*1024*1024)
15637 /* X32 runs in 64-bit mode, where displacement, DISP, in
15638 address DISP(%r64), is encoded as 32-bit immediate sign-
15639 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15640 address is %r64 + 0xffffffffbffffd00. When %r64 <
15641 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15642 which is invalid for x32. The correct address is %r64
15643 - 0x40000300 == 0xf7ffdd64. To properly encode
15644 -0x40000300(%r64) for x32, we zero-extend negative
15645 displacement by forcing addr32 prefix which truncates
15646 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15647 zero-extend all negative displacements, including -1(%rsp).
15648 However, for small negative displacements, sign-extension
15649 won't cause overflow. We only zero-extend negative
15650 displacements if they < -16*1024*1024, which is also used
15651 to check legitimate address displacements for PIC. */
15652 code = 'k';
15655 if (ASSEMBLER_DIALECT == ASM_ATT)
15657 if (disp)
15659 if (flag_pic)
15660 output_pic_addr_const (file, disp, 0);
15661 else if (GET_CODE (disp) == LABEL_REF)
15662 output_asm_label (disp);
15663 else
15664 output_addr_const (file, disp);
15667 putc ('(', file);
15668 if (base)
15669 print_reg (base, code, file);
15670 if (index)
15672 putc (',', file);
15673 print_reg (index, vsib ? 0 : code, file);
15674 if (scale != 1 || vsib)
15675 fprintf (file, ",%d", scale);
15677 putc (')', file);
15679 else
15681 rtx offset = NULL_RTX;
15683 if (disp)
15685 /* Pull out the offset of a symbol; print any symbol itself. */
15686 if (GET_CODE (disp) == CONST
15687 && GET_CODE (XEXP (disp, 0)) == PLUS
15688 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15690 offset = XEXP (XEXP (disp, 0), 1);
15691 disp = gen_rtx_CONST (VOIDmode,
15692 XEXP (XEXP (disp, 0), 0));
15695 if (flag_pic)
15696 output_pic_addr_const (file, disp, 0);
15697 else if (GET_CODE (disp) == LABEL_REF)
15698 output_asm_label (disp);
15699 else if (CONST_INT_P (disp))
15700 offset = disp;
15701 else
15702 output_addr_const (file, disp);
15705 putc ('[', file);
15706 if (base)
15708 print_reg (base, code, file);
15709 if (offset)
15711 if (INTVAL (offset) >= 0)
15712 putc ('+', file);
15713 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15716 else if (offset)
15717 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15718 else
15719 putc ('0', file);
15721 if (index)
15723 putc ('+', file);
15724 print_reg (index, vsib ? 0 : code, file);
15725 if (scale != 1 || vsib)
15726 fprintf (file, "*%d", scale);
15728 putc (']', file);
15733 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15735 static bool
15736 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15738 rtx op;
15740 if (GET_CODE (x) != UNSPEC)
15741 return false;
15743 op = XVECEXP (x, 0, 0);
15744 switch (XINT (x, 1))
15746 case UNSPEC_GOTTPOFF:
15747 output_addr_const (file, op);
15748 /* FIXME: This might be @TPOFF in Sun ld. */
15749 fputs ("@gottpoff", file);
15750 break;
15751 case UNSPEC_TPOFF:
15752 output_addr_const (file, op);
15753 fputs ("@tpoff", file);
15754 break;
15755 case UNSPEC_NTPOFF:
15756 output_addr_const (file, op);
15757 if (TARGET_64BIT)
15758 fputs ("@tpoff", file);
15759 else
15760 fputs ("@ntpoff", file);
15761 break;
15762 case UNSPEC_DTPOFF:
15763 output_addr_const (file, op);
15764 fputs ("@dtpoff", file);
15765 break;
15766 case UNSPEC_GOTNTPOFF:
15767 output_addr_const (file, op);
15768 if (TARGET_64BIT)
15769 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15770 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15771 else
15772 fputs ("@gotntpoff", file);
15773 break;
15774 case UNSPEC_INDNTPOFF:
15775 output_addr_const (file, op);
15776 fputs ("@indntpoff", file);
15777 break;
15778 #if TARGET_MACHO
15779 case UNSPEC_MACHOPIC_OFFSET:
15780 output_addr_const (file, op);
15781 putc ('-', file);
15782 machopic_output_function_base_name (file);
15783 break;
15784 #endif
15786 case UNSPEC_STACK_CHECK:
15788 int offset;
15790 gcc_assert (flag_split_stack);
15792 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15793 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15794 #else
15795 gcc_unreachable ();
15796 #endif
15798 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15800 break;
15802 default:
15803 return false;
15806 return true;
15809 /* Split one or more double-mode RTL references into pairs of half-mode
15810 references. The RTL can be REG, offsettable MEM, integer constant, or
15811 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15812 split and "num" is its length. lo_half and hi_half are output arrays
15813 that parallel "operands". */
15815 void
15816 split_double_mode (enum machine_mode mode, rtx operands[],
15817 int num, rtx lo_half[], rtx hi_half[])
15819 enum machine_mode half_mode;
15820 unsigned int byte;
15822 switch (mode)
15824 case TImode:
15825 half_mode = DImode;
15826 break;
15827 case DImode:
15828 half_mode = SImode;
15829 break;
15830 default:
15831 gcc_unreachable ();
15834 byte = GET_MODE_SIZE (half_mode);
15836 while (num--)
15838 rtx op = operands[num];
15840 /* simplify_subreg refuse to split volatile memory addresses,
15841 but we still have to handle it. */
15842 if (MEM_P (op))
15844 lo_half[num] = adjust_address (op, half_mode, 0);
15845 hi_half[num] = adjust_address (op, half_mode, byte);
15847 else
15849 lo_half[num] = simplify_gen_subreg (half_mode, op,
15850 GET_MODE (op) == VOIDmode
15851 ? mode : GET_MODE (op), 0);
15852 hi_half[num] = simplify_gen_subreg (half_mode, op,
15853 GET_MODE (op) == VOIDmode
15854 ? mode : GET_MODE (op), byte);
15859 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15860 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15861 is the expression of the binary operation. The output may either be
15862 emitted here, or returned to the caller, like all output_* functions.
15864 There is no guarantee that the operands are the same mode, as they
15865 might be within FLOAT or FLOAT_EXTEND expressions. */
15867 #ifndef SYSV386_COMPAT
15868 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15869 wants to fix the assemblers because that causes incompatibility
15870 with gcc. No-one wants to fix gcc because that causes
15871 incompatibility with assemblers... You can use the option of
15872 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15873 #define SYSV386_COMPAT 1
15874 #endif
15876 const char *
15877 output_387_binary_op (rtx insn, rtx *operands)
15879 static char buf[40];
15880 const char *p;
15881 const char *ssep;
15882 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15884 #ifdef ENABLE_CHECKING
15885 /* Even if we do not want to check the inputs, this documents input
15886 constraints. Which helps in understanding the following code. */
15887 if (STACK_REG_P (operands[0])
15888 && ((REG_P (operands[1])
15889 && REGNO (operands[0]) == REGNO (operands[1])
15890 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15891 || (REG_P (operands[2])
15892 && REGNO (operands[0]) == REGNO (operands[2])
15893 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15894 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15895 ; /* ok */
15896 else
15897 gcc_assert (is_sse);
15898 #endif
15900 switch (GET_CODE (operands[3]))
15902 case PLUS:
15903 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15904 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15905 p = "fiadd";
15906 else
15907 p = "fadd";
15908 ssep = "vadd";
15909 break;
15911 case MINUS:
15912 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15913 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15914 p = "fisub";
15915 else
15916 p = "fsub";
15917 ssep = "vsub";
15918 break;
15920 case MULT:
15921 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15922 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15923 p = "fimul";
15924 else
15925 p = "fmul";
15926 ssep = "vmul";
15927 break;
15929 case DIV:
15930 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15931 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15932 p = "fidiv";
15933 else
15934 p = "fdiv";
15935 ssep = "vdiv";
15936 break;
15938 default:
15939 gcc_unreachable ();
15942 if (is_sse)
15944 if (TARGET_AVX)
15946 strcpy (buf, ssep);
15947 if (GET_MODE (operands[0]) == SFmode)
15948 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15949 else
15950 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15952 else
15954 strcpy (buf, ssep + 1);
15955 if (GET_MODE (operands[0]) == SFmode)
15956 strcat (buf, "ss\t{%2, %0|%0, %2}");
15957 else
15958 strcat (buf, "sd\t{%2, %0|%0, %2}");
15960 return buf;
15962 strcpy (buf, p);
15964 switch (GET_CODE (operands[3]))
15966 case MULT:
15967 case PLUS:
15968 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15970 rtx temp = operands[2];
15971 operands[2] = operands[1];
15972 operands[1] = temp;
15975 /* know operands[0] == operands[1]. */
15977 if (MEM_P (operands[2]))
15979 p = "%Z2\t%2";
15980 break;
15983 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15985 if (STACK_TOP_P (operands[0]))
15986 /* How is it that we are storing to a dead operand[2]?
15987 Well, presumably operands[1] is dead too. We can't
15988 store the result to st(0) as st(0) gets popped on this
15989 instruction. Instead store to operands[2] (which I
15990 think has to be st(1)). st(1) will be popped later.
15991 gcc <= 2.8.1 didn't have this check and generated
15992 assembly code that the Unixware assembler rejected. */
15993 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15994 else
15995 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15996 break;
15999 if (STACK_TOP_P (operands[0]))
16000 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16001 else
16002 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16003 break;
16005 case MINUS:
16006 case DIV:
16007 if (MEM_P (operands[1]))
16009 p = "r%Z1\t%1";
16010 break;
16013 if (MEM_P (operands[2]))
16015 p = "%Z2\t%2";
16016 break;
16019 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16021 #if SYSV386_COMPAT
16022 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16023 derived assemblers, confusingly reverse the direction of
16024 the operation for fsub{r} and fdiv{r} when the
16025 destination register is not st(0). The Intel assembler
16026 doesn't have this brain damage. Read !SYSV386_COMPAT to
16027 figure out what the hardware really does. */
16028 if (STACK_TOP_P (operands[0]))
16029 p = "{p\t%0, %2|rp\t%2, %0}";
16030 else
16031 p = "{rp\t%2, %0|p\t%0, %2}";
16032 #else
16033 if (STACK_TOP_P (operands[0]))
16034 /* As above for fmul/fadd, we can't store to st(0). */
16035 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16036 else
16037 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16038 #endif
16039 break;
16042 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16044 #if SYSV386_COMPAT
16045 if (STACK_TOP_P (operands[0]))
16046 p = "{rp\t%0, %1|p\t%1, %0}";
16047 else
16048 p = "{p\t%1, %0|rp\t%0, %1}";
16049 #else
16050 if (STACK_TOP_P (operands[0]))
16051 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16052 else
16053 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16054 #endif
16055 break;
16058 if (STACK_TOP_P (operands[0]))
16060 if (STACK_TOP_P (operands[1]))
16061 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16062 else
16063 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16064 break;
16066 else if (STACK_TOP_P (operands[1]))
16068 #if SYSV386_COMPAT
16069 p = "{\t%1, %0|r\t%0, %1}";
16070 #else
16071 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16072 #endif
16074 else
16076 #if SYSV386_COMPAT
16077 p = "{r\t%2, %0|\t%0, %2}";
16078 #else
16079 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16080 #endif
16082 break;
16084 default:
16085 gcc_unreachable ();
16088 strcat (buf, p);
16089 return buf;
16092 /* Check if a 256bit AVX register is referenced inside of EXP. */
16094 static int
16095 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16097 rtx exp = *pexp;
16099 if (GET_CODE (exp) == SUBREG)
16100 exp = SUBREG_REG (exp);
16102 if (REG_P (exp)
16103 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16104 return 1;
16106 return 0;
16109 /* Return needed mode for entity in optimize_mode_switching pass. */
16111 static int
16112 ix86_avx_u128_mode_needed (rtx insn)
16114 if (CALL_P (insn))
16116 rtx link;
16118 /* Needed mode is set to AVX_U128_CLEAN if there are
16119 no 256bit modes used in function arguments. */
16120 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16121 link;
16122 link = XEXP (link, 1))
16124 if (GET_CODE (XEXP (link, 0)) == USE)
16126 rtx arg = XEXP (XEXP (link, 0), 0);
16128 if (ix86_check_avx256_register (&arg, NULL))
16129 return AVX_U128_DIRTY;
16133 return AVX_U128_CLEAN;
16136 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16137 changes state only when a 256bit register is written to, but we need
16138 to prevent the compiler from moving optimal insertion point above
16139 eventual read from 256bit register. */
16140 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16141 return AVX_U128_DIRTY;
16143 return AVX_U128_ANY;
16146 /* Return mode that i387 must be switched into
16147 prior to the execution of insn. */
16149 static int
16150 ix86_i387_mode_needed (int entity, rtx insn)
16152 enum attr_i387_cw mode;
16154 /* The mode UNINITIALIZED is used to store control word after a
16155 function call or ASM pattern. The mode ANY specify that function
16156 has no requirements on the control word and make no changes in the
16157 bits we are interested in. */
16159 if (CALL_P (insn)
16160 || (NONJUMP_INSN_P (insn)
16161 && (asm_noperands (PATTERN (insn)) >= 0
16162 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16163 return I387_CW_UNINITIALIZED;
16165 if (recog_memoized (insn) < 0)
16166 return I387_CW_ANY;
16168 mode = get_attr_i387_cw (insn);
16170 switch (entity)
16172 case I387_TRUNC:
16173 if (mode == I387_CW_TRUNC)
16174 return mode;
16175 break;
16177 case I387_FLOOR:
16178 if (mode == I387_CW_FLOOR)
16179 return mode;
16180 break;
16182 case I387_CEIL:
16183 if (mode == I387_CW_CEIL)
16184 return mode;
16185 break;
16187 case I387_MASK_PM:
16188 if (mode == I387_CW_MASK_PM)
16189 return mode;
16190 break;
16192 default:
16193 gcc_unreachable ();
16196 return I387_CW_ANY;
16199 /* Return mode that entity must be switched into
16200 prior to the execution of insn. */
16202 static int
16203 ix86_mode_needed (int entity, rtx insn)
16205 switch (entity)
16207 case AVX_U128:
16208 return ix86_avx_u128_mode_needed (insn);
16209 case I387_TRUNC:
16210 case I387_FLOOR:
16211 case I387_CEIL:
16212 case I387_MASK_PM:
16213 return ix86_i387_mode_needed (entity, insn);
16214 default:
16215 gcc_unreachable ();
16217 return 0;
16220 /* Check if a 256bit AVX register is referenced in stores. */
16222 static void
16223 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16225 if (ix86_check_avx256_register (&dest, NULL))
16227 bool *used = (bool *) data;
16228 *used = true;
16232 /* Calculate mode of upper 128bit AVX registers after the insn. */
16234 static int
16235 ix86_avx_u128_mode_after (int mode, rtx insn)
16237 rtx pat = PATTERN (insn);
16239 if (vzeroupper_operation (pat, VOIDmode)
16240 || vzeroall_operation (pat, VOIDmode))
16241 return AVX_U128_CLEAN;
16243 /* We know that state is clean after CALL insn if there are no
16244 256bit registers used in the function return register. */
16245 if (CALL_P (insn))
16247 bool avx_reg256_found = false;
16248 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16250 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16253 /* Otherwise, return current mode. Remember that if insn
16254 references AVX 256bit registers, the mode was already changed
16255 to DIRTY from MODE_NEEDED. */
16256 return mode;
16259 /* Return the mode that an insn results in. */
16262 ix86_mode_after (int entity, int mode, rtx insn)
16264 switch (entity)
16266 case AVX_U128:
16267 return ix86_avx_u128_mode_after (mode, insn);
16268 case I387_TRUNC:
16269 case I387_FLOOR:
16270 case I387_CEIL:
16271 case I387_MASK_PM:
16272 return mode;
16273 default:
16274 gcc_unreachable ();
16278 static int
16279 ix86_avx_u128_mode_entry (void)
16281 tree arg;
16283 /* Entry mode is set to AVX_U128_DIRTY if there are
16284 256bit modes used in function arguments. */
16285 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16286 arg = TREE_CHAIN (arg))
16288 rtx incoming = DECL_INCOMING_RTL (arg);
16290 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16291 return AVX_U128_DIRTY;
16294 return AVX_U128_CLEAN;
16297 /* Return a mode that ENTITY is assumed to be
16298 switched to at function entry. */
16300 static int
16301 ix86_mode_entry (int entity)
16303 switch (entity)
16305 case AVX_U128:
16306 return ix86_avx_u128_mode_entry ();
16307 case I387_TRUNC:
16308 case I387_FLOOR:
16309 case I387_CEIL:
16310 case I387_MASK_PM:
16311 return I387_CW_ANY;
16312 default:
16313 gcc_unreachable ();
16317 static int
16318 ix86_avx_u128_mode_exit (void)
16320 rtx reg = crtl->return_rtx;
16322 /* Exit mode is set to AVX_U128_DIRTY if there are
16323 256bit modes used in the function return register. */
16324 if (reg && ix86_check_avx256_register (&reg, NULL))
16325 return AVX_U128_DIRTY;
16327 return AVX_U128_CLEAN;
16330 /* Return a mode that ENTITY is assumed to be
16331 switched to at function exit. */
16333 static int
16334 ix86_mode_exit (int entity)
16336 switch (entity)
16338 case AVX_U128:
16339 return ix86_avx_u128_mode_exit ();
16340 case I387_TRUNC:
16341 case I387_FLOOR:
16342 case I387_CEIL:
16343 case I387_MASK_PM:
16344 return I387_CW_ANY;
16345 default:
16346 gcc_unreachable ();
16350 static int
16351 ix86_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
16353 return n;
16356 /* Output code to initialize control word copies used by trunc?f?i and
16357 rounding patterns. CURRENT_MODE is set to current control word,
16358 while NEW_MODE is set to new control word. */
16360 static void
16361 emit_i387_cw_initialization (int mode)
16363 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16364 rtx new_mode;
16366 enum ix86_stack_slot slot;
16368 rtx reg = gen_reg_rtx (HImode);
16370 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16371 emit_move_insn (reg, copy_rtx (stored_mode));
16373 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16374 || optimize_insn_for_size_p ())
16376 switch (mode)
16378 case I387_CW_TRUNC:
16379 /* round toward zero (truncate) */
16380 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16381 slot = SLOT_CW_TRUNC;
16382 break;
16384 case I387_CW_FLOOR:
16385 /* round down toward -oo */
16386 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16387 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16388 slot = SLOT_CW_FLOOR;
16389 break;
16391 case I387_CW_CEIL:
16392 /* round up toward +oo */
16393 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16394 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16395 slot = SLOT_CW_CEIL;
16396 break;
16398 case I387_CW_MASK_PM:
16399 /* mask precision exception for nearbyint() */
16400 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16401 slot = SLOT_CW_MASK_PM;
16402 break;
16404 default:
16405 gcc_unreachable ();
16408 else
16410 switch (mode)
16412 case I387_CW_TRUNC:
16413 /* round toward zero (truncate) */
16414 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16415 slot = SLOT_CW_TRUNC;
16416 break;
16418 case I387_CW_FLOOR:
16419 /* round down toward -oo */
16420 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16421 slot = SLOT_CW_FLOOR;
16422 break;
16424 case I387_CW_CEIL:
16425 /* round up toward +oo */
16426 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16427 slot = SLOT_CW_CEIL;
16428 break;
16430 case I387_CW_MASK_PM:
16431 /* mask precision exception for nearbyint() */
16432 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16433 slot = SLOT_CW_MASK_PM;
16434 break;
16436 default:
16437 gcc_unreachable ();
16441 gcc_assert (slot < MAX_386_STACK_LOCALS);
16443 new_mode = assign_386_stack_local (HImode, slot);
16444 emit_move_insn (new_mode, reg);
16447 /* Emit vzeroupper. */
16449 void
16450 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16452 int i;
16454 /* Cancel automatic vzeroupper insertion if there are
16455 live call-saved SSE registers at the insertion point. */
16457 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16458 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16459 return;
16461 if (TARGET_64BIT)
16462 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16463 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16464 return;
16466 emit_insn (gen_avx_vzeroupper ());
16469 /* Generate one or more insns to set ENTITY to MODE. */
16471 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16472 is the set of hard registers live at the point where the insn(s)
16473 are to be inserted. */
16475 static void
16476 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16477 HARD_REG_SET regs_live)
16479 switch (entity)
16481 case AVX_U128:
16482 if (mode == AVX_U128_CLEAN)
16483 ix86_avx_emit_vzeroupper (regs_live);
16484 break;
16485 case I387_TRUNC:
16486 case I387_FLOOR:
16487 case I387_CEIL:
16488 case I387_MASK_PM:
16489 if (mode != I387_CW_ANY
16490 && mode != I387_CW_UNINITIALIZED)
16491 emit_i387_cw_initialization (mode);
16492 break;
16493 default:
16494 gcc_unreachable ();
16498 /* Output code for INSN to convert a float to a signed int. OPERANDS
16499 are the insn operands. The output may be [HSD]Imode and the input
16500 operand may be [SDX]Fmode. */
16502 const char *
16503 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16505 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16506 int dimode_p = GET_MODE (operands[0]) == DImode;
16507 int round_mode = get_attr_i387_cw (insn);
16509 /* Jump through a hoop or two for DImode, since the hardware has no
16510 non-popping instruction. We used to do this a different way, but
16511 that was somewhat fragile and broke with post-reload splitters. */
16512 if ((dimode_p || fisttp) && !stack_top_dies)
16513 output_asm_insn ("fld\t%y1", operands);
16515 gcc_assert (STACK_TOP_P (operands[1]));
16516 gcc_assert (MEM_P (operands[0]));
16517 gcc_assert (GET_MODE (operands[1]) != TFmode);
16519 if (fisttp)
16520 output_asm_insn ("fisttp%Z0\t%0", operands);
16521 else
16523 if (round_mode != I387_CW_ANY)
16524 output_asm_insn ("fldcw\t%3", operands);
16525 if (stack_top_dies || dimode_p)
16526 output_asm_insn ("fistp%Z0\t%0", operands);
16527 else
16528 output_asm_insn ("fist%Z0\t%0", operands);
16529 if (round_mode != I387_CW_ANY)
16530 output_asm_insn ("fldcw\t%2", operands);
16533 return "";
16536 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16537 have the values zero or one, indicates the ffreep insn's operand
16538 from the OPERANDS array. */
16540 static const char *
16541 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16543 if (TARGET_USE_FFREEP)
16544 #ifdef HAVE_AS_IX86_FFREEP
16545 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16546 #else
16548 static char retval[32];
16549 int regno = REGNO (operands[opno]);
16551 gcc_assert (STACK_REGNO_P (regno));
16553 regno -= FIRST_STACK_REG;
16555 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16556 return retval;
16558 #endif
16560 return opno ? "fstp\t%y1" : "fstp\t%y0";
16564 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16565 should be used. UNORDERED_P is true when fucom should be used. */
16567 const char *
16568 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16570 int stack_top_dies;
16571 rtx cmp_op0, cmp_op1;
16572 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16574 if (eflags_p)
16576 cmp_op0 = operands[0];
16577 cmp_op1 = operands[1];
16579 else
16581 cmp_op0 = operands[1];
16582 cmp_op1 = operands[2];
16585 if (is_sse)
16587 if (GET_MODE (operands[0]) == SFmode)
16588 if (unordered_p)
16589 return "%vucomiss\t{%1, %0|%0, %1}";
16590 else
16591 return "%vcomiss\t{%1, %0|%0, %1}";
16592 else
16593 if (unordered_p)
16594 return "%vucomisd\t{%1, %0|%0, %1}";
16595 else
16596 return "%vcomisd\t{%1, %0|%0, %1}";
16599 gcc_assert (STACK_TOP_P (cmp_op0));
16601 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16603 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16605 if (stack_top_dies)
16607 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16608 return output_387_ffreep (operands, 1);
16610 else
16611 return "ftst\n\tfnstsw\t%0";
16614 if (STACK_REG_P (cmp_op1)
16615 && stack_top_dies
16616 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16617 && REGNO (cmp_op1) != FIRST_STACK_REG)
16619 /* If both the top of the 387 stack dies, and the other operand
16620 is also a stack register that dies, then this must be a
16621 `fcompp' float compare */
16623 if (eflags_p)
16625 /* There is no double popping fcomi variant. Fortunately,
16626 eflags is immune from the fstp's cc clobbering. */
16627 if (unordered_p)
16628 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16629 else
16630 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16631 return output_387_ffreep (operands, 0);
16633 else
16635 if (unordered_p)
16636 return "fucompp\n\tfnstsw\t%0";
16637 else
16638 return "fcompp\n\tfnstsw\t%0";
16641 else
16643 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16645 static const char * const alt[16] =
16647 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16648 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16649 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16650 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16652 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16653 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16654 NULL,
16655 NULL,
16657 "fcomi\t{%y1, %0|%0, %y1}",
16658 "fcomip\t{%y1, %0|%0, %y1}",
16659 "fucomi\t{%y1, %0|%0, %y1}",
16660 "fucomip\t{%y1, %0|%0, %y1}",
16662 NULL,
16663 NULL,
16664 NULL,
16665 NULL
16668 int mask;
16669 const char *ret;
16671 mask = eflags_p << 3;
16672 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16673 mask |= unordered_p << 1;
16674 mask |= stack_top_dies;
16676 gcc_assert (mask < 16);
16677 ret = alt[mask];
16678 gcc_assert (ret);
16680 return ret;
16684 void
16685 ix86_output_addr_vec_elt (FILE *file, int value)
16687 const char *directive = ASM_LONG;
16689 #ifdef ASM_QUAD
16690 if (TARGET_LP64)
16691 directive = ASM_QUAD;
16692 #else
16693 gcc_assert (!TARGET_64BIT);
16694 #endif
16696 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16699 void
16700 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16702 const char *directive = ASM_LONG;
16704 #ifdef ASM_QUAD
16705 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16706 directive = ASM_QUAD;
16707 #else
16708 gcc_assert (!TARGET_64BIT);
16709 #endif
16710 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16711 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16712 fprintf (file, "%s%s%d-%s%d\n",
16713 directive, LPREFIX, value, LPREFIX, rel);
16714 else if (HAVE_AS_GOTOFF_IN_DATA)
16715 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16716 #if TARGET_MACHO
16717 else if (TARGET_MACHO)
16719 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16720 machopic_output_function_base_name (file);
16721 putc ('\n', file);
16723 #endif
16724 else
16725 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16726 GOT_SYMBOL_NAME, LPREFIX, value);
16729 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16730 for the target. */
16732 void
16733 ix86_expand_clear (rtx dest)
16735 rtx tmp;
16737 /* We play register width games, which are only valid after reload. */
16738 gcc_assert (reload_completed);
16740 /* Avoid HImode and its attendant prefix byte. */
16741 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16742 dest = gen_rtx_REG (SImode, REGNO (dest));
16743 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16745 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16747 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16748 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16751 emit_insn (tmp);
16754 /* X is an unchanging MEM. If it is a constant pool reference, return
16755 the constant pool rtx, else NULL. */
16758 maybe_get_pool_constant (rtx x)
16760 x = ix86_delegitimize_address (XEXP (x, 0));
16762 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16763 return get_pool_constant (x);
16765 return NULL_RTX;
16768 void
16769 ix86_expand_move (enum machine_mode mode, rtx operands[])
16771 rtx op0, op1;
16772 enum tls_model model;
16774 op0 = operands[0];
16775 op1 = operands[1];
16777 if (GET_CODE (op1) == SYMBOL_REF)
16779 rtx tmp;
16781 model = SYMBOL_REF_TLS_MODEL (op1);
16782 if (model)
16784 op1 = legitimize_tls_address (op1, model, true);
16785 op1 = force_operand (op1, op0);
16786 if (op1 == op0)
16787 return;
16788 op1 = convert_to_mode (mode, op1, 1);
16790 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16791 op1 = tmp;
16793 else if (GET_CODE (op1) == CONST
16794 && GET_CODE (XEXP (op1, 0)) == PLUS
16795 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16797 rtx addend = XEXP (XEXP (op1, 0), 1);
16798 rtx symbol = XEXP (XEXP (op1, 0), 0);
16799 rtx tmp;
16801 model = SYMBOL_REF_TLS_MODEL (symbol);
16802 if (model)
16803 tmp = legitimize_tls_address (symbol, model, true);
16804 else
16805 tmp = legitimize_pe_coff_symbol (symbol, true);
16807 if (tmp)
16809 tmp = force_operand (tmp, NULL);
16810 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16811 op0, 1, OPTAB_DIRECT);
16812 if (tmp == op0)
16813 return;
16814 op1 = convert_to_mode (mode, tmp, 1);
16818 if ((flag_pic || MACHOPIC_INDIRECT)
16819 && symbolic_operand (op1, mode))
16821 if (TARGET_MACHO && !TARGET_64BIT)
16823 #if TARGET_MACHO
16824 /* dynamic-no-pic */
16825 if (MACHOPIC_INDIRECT)
16827 rtx temp = ((reload_in_progress
16828 || ((op0 && REG_P (op0))
16829 && mode == Pmode))
16830 ? op0 : gen_reg_rtx (Pmode));
16831 op1 = machopic_indirect_data_reference (op1, temp);
16832 if (MACHOPIC_PURE)
16833 op1 = machopic_legitimize_pic_address (op1, mode,
16834 temp == op1 ? 0 : temp);
16836 if (op0 != op1 && GET_CODE (op0) != MEM)
16838 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16839 emit_insn (insn);
16840 return;
16842 if (GET_CODE (op0) == MEM)
16843 op1 = force_reg (Pmode, op1);
16844 else
16846 rtx temp = op0;
16847 if (GET_CODE (temp) != REG)
16848 temp = gen_reg_rtx (Pmode);
16849 temp = legitimize_pic_address (op1, temp);
16850 if (temp == op0)
16851 return;
16852 op1 = temp;
16854 /* dynamic-no-pic */
16855 #endif
16857 else
16859 if (MEM_P (op0))
16860 op1 = force_reg (mode, op1);
16861 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16863 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16864 op1 = legitimize_pic_address (op1, reg);
16865 if (op0 == op1)
16866 return;
16867 op1 = convert_to_mode (mode, op1, 1);
16871 else
16873 if (MEM_P (op0)
16874 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16875 || !push_operand (op0, mode))
16876 && MEM_P (op1))
16877 op1 = force_reg (mode, op1);
16879 if (push_operand (op0, mode)
16880 && ! general_no_elim_operand (op1, mode))
16881 op1 = copy_to_mode_reg (mode, op1);
16883 /* Force large constants in 64bit compilation into register
16884 to get them CSEed. */
16885 if (can_create_pseudo_p ()
16886 && (mode == DImode) && TARGET_64BIT
16887 && immediate_operand (op1, mode)
16888 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16889 && !register_operand (op0, mode)
16890 && optimize)
16891 op1 = copy_to_mode_reg (mode, op1);
16893 if (can_create_pseudo_p ()
16894 && FLOAT_MODE_P (mode)
16895 && GET_CODE (op1) == CONST_DOUBLE)
16897 /* If we are loading a floating point constant to a register,
16898 force the value to memory now, since we'll get better code
16899 out the back end. */
16901 op1 = validize_mem (force_const_mem (mode, op1));
16902 if (!register_operand (op0, mode))
16904 rtx temp = gen_reg_rtx (mode);
16905 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16906 emit_move_insn (op0, temp);
16907 return;
16912 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16915 void
16916 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16918 rtx op0 = operands[0], op1 = operands[1];
16919 unsigned int align = GET_MODE_ALIGNMENT (mode);
16921 if (push_operand (op0, VOIDmode))
16922 op0 = emit_move_resolve_push (mode, op0);
16924 /* Force constants other than zero into memory. We do not know how
16925 the instructions used to build constants modify the upper 64 bits
16926 of the register, once we have that information we may be able
16927 to handle some of them more efficiently. */
16928 if (can_create_pseudo_p ()
16929 && register_operand (op0, mode)
16930 && (CONSTANT_P (op1)
16931 || (GET_CODE (op1) == SUBREG
16932 && CONSTANT_P (SUBREG_REG (op1))))
16933 && !standard_sse_constant_p (op1))
16934 op1 = validize_mem (force_const_mem (mode, op1));
16936 /* We need to check memory alignment for SSE mode since attribute
16937 can make operands unaligned. */
16938 if (can_create_pseudo_p ()
16939 && SSE_REG_MODE_P (mode)
16940 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16941 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16943 rtx tmp[2];
16945 /* ix86_expand_vector_move_misalign() does not like constants ... */
16946 if (CONSTANT_P (op1)
16947 || (GET_CODE (op1) == SUBREG
16948 && CONSTANT_P (SUBREG_REG (op1))))
16949 op1 = validize_mem (force_const_mem (mode, op1));
16951 /* ... nor both arguments in memory. */
16952 if (!register_operand (op0, mode)
16953 && !register_operand (op1, mode))
16954 op1 = force_reg (mode, op1);
16956 tmp[0] = op0; tmp[1] = op1;
16957 ix86_expand_vector_move_misalign (mode, tmp);
16958 return;
16961 /* Make operand1 a register if it isn't already. */
16962 if (can_create_pseudo_p ()
16963 && !register_operand (op0, mode)
16964 && !register_operand (op1, mode))
16966 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16967 return;
16970 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16973 /* Split 32-byte AVX unaligned load and store if needed. */
16975 static void
16976 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16978 rtx m;
16979 rtx (*extract) (rtx, rtx, rtx);
16980 rtx (*load_unaligned) (rtx, rtx);
16981 rtx (*store_unaligned) (rtx, rtx);
16982 enum machine_mode mode;
16984 switch (GET_MODE (op0))
16986 default:
16987 gcc_unreachable ();
16988 case V32QImode:
16989 extract = gen_avx_vextractf128v32qi;
16990 load_unaligned = gen_avx_loaddquv32qi;
16991 store_unaligned = gen_avx_storedquv32qi;
16992 mode = V16QImode;
16993 break;
16994 case V8SFmode:
16995 extract = gen_avx_vextractf128v8sf;
16996 load_unaligned = gen_avx_loadups256;
16997 store_unaligned = gen_avx_storeups256;
16998 mode = V4SFmode;
16999 break;
17000 case V4DFmode:
17001 extract = gen_avx_vextractf128v4df;
17002 load_unaligned = gen_avx_loadupd256;
17003 store_unaligned = gen_avx_storeupd256;
17004 mode = V2DFmode;
17005 break;
17008 if (MEM_P (op1))
17010 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17012 rtx r = gen_reg_rtx (mode);
17013 m = adjust_address (op1, mode, 0);
17014 emit_move_insn (r, m);
17015 m = adjust_address (op1, mode, 16);
17016 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17017 emit_move_insn (op0, r);
17019 /* Normal *mov<mode>_internal pattern will handle
17020 unaligned loads just fine if misaligned_operand
17021 is true, and without the UNSPEC it can be combined
17022 with arithmetic instructions. */
17023 else if (misaligned_operand (op1, GET_MODE (op1)))
17024 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17025 else
17026 emit_insn (load_unaligned (op0, op1));
17028 else if (MEM_P (op0))
17030 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17032 m = adjust_address (op0, mode, 0);
17033 emit_insn (extract (m, op1, const0_rtx));
17034 m = adjust_address (op0, mode, 16);
17035 emit_insn (extract (m, op1, const1_rtx));
17037 else
17038 emit_insn (store_unaligned (op0, op1));
17040 else
17041 gcc_unreachable ();
17044 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17045 straight to ix86_expand_vector_move. */
17046 /* Code generation for scalar reg-reg moves of single and double precision data:
17047 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17048 movaps reg, reg
17049 else
17050 movss reg, reg
17051 if (x86_sse_partial_reg_dependency == true)
17052 movapd reg, reg
17053 else
17054 movsd reg, reg
17056 Code generation for scalar loads of double precision data:
17057 if (x86_sse_split_regs == true)
17058 movlpd mem, reg (gas syntax)
17059 else
17060 movsd mem, reg
17062 Code generation for unaligned packed loads of single precision data
17063 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17064 if (x86_sse_unaligned_move_optimal)
17065 movups mem, reg
17067 if (x86_sse_partial_reg_dependency == true)
17069 xorps reg, reg
17070 movlps mem, reg
17071 movhps mem+8, reg
17073 else
17075 movlps mem, reg
17076 movhps mem+8, reg
17079 Code generation for unaligned packed loads of double precision data
17080 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17081 if (x86_sse_unaligned_move_optimal)
17082 movupd mem, reg
17084 if (x86_sse_split_regs == true)
17086 movlpd mem, reg
17087 movhpd mem+8, reg
17089 else
17091 movsd mem, reg
17092 movhpd mem+8, reg
17096 void
17097 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17099 rtx op0, op1, orig_op0 = NULL_RTX, m;
17100 rtx (*load_unaligned) (rtx, rtx);
17101 rtx (*store_unaligned) (rtx, rtx);
17103 op0 = operands[0];
17104 op1 = operands[1];
17106 if (GET_MODE_SIZE (mode) == 64)
17108 switch (GET_MODE_CLASS (mode))
17110 case MODE_VECTOR_INT:
17111 case MODE_INT:
17112 if (GET_MODE (op0) != V16SImode)
17114 if (!MEM_P (op0))
17116 orig_op0 = op0;
17117 op0 = gen_reg_rtx (V16SImode);
17119 else
17120 op0 = gen_lowpart (V16SImode, op0);
17122 op1 = gen_lowpart (V16SImode, op1);
17123 /* FALLTHRU */
17125 case MODE_VECTOR_FLOAT:
17126 switch (GET_MODE (op0))
17128 default:
17129 gcc_unreachable ();
17130 case V16SImode:
17131 load_unaligned = gen_avx512f_loaddquv16si;
17132 store_unaligned = gen_avx512f_storedquv16si;
17133 break;
17134 case V16SFmode:
17135 load_unaligned = gen_avx512f_loadups512;
17136 store_unaligned = gen_avx512f_storeups512;
17137 break;
17138 case V8DFmode:
17139 load_unaligned = gen_avx512f_loadupd512;
17140 store_unaligned = gen_avx512f_storeupd512;
17141 break;
17144 if (MEM_P (op1))
17145 emit_insn (load_unaligned (op0, op1));
17146 else if (MEM_P (op0))
17147 emit_insn (store_unaligned (op0, op1));
17148 else
17149 gcc_unreachable ();
17150 if (orig_op0)
17151 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17152 break;
17154 default:
17155 gcc_unreachable ();
17158 return;
17161 if (TARGET_AVX
17162 && GET_MODE_SIZE (mode) == 32)
17164 switch (GET_MODE_CLASS (mode))
17166 case MODE_VECTOR_INT:
17167 case MODE_INT:
17168 if (GET_MODE (op0) != V32QImode)
17170 if (!MEM_P (op0))
17172 orig_op0 = op0;
17173 op0 = gen_reg_rtx (V32QImode);
17175 else
17176 op0 = gen_lowpart (V32QImode, op0);
17178 op1 = gen_lowpart (V32QImode, op1);
17179 /* FALLTHRU */
17181 case MODE_VECTOR_FLOAT:
17182 ix86_avx256_split_vector_move_misalign (op0, op1);
17183 if (orig_op0)
17184 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17185 break;
17187 default:
17188 gcc_unreachable ();
17191 return;
17194 if (MEM_P (op1))
17196 /* Normal *mov<mode>_internal pattern will handle
17197 unaligned loads just fine if misaligned_operand
17198 is true, and without the UNSPEC it can be combined
17199 with arithmetic instructions. */
17200 if (TARGET_AVX
17201 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17202 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17203 && misaligned_operand (op1, GET_MODE (op1)))
17204 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17205 /* ??? If we have typed data, then it would appear that using
17206 movdqu is the only way to get unaligned data loaded with
17207 integer type. */
17208 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17210 if (GET_MODE (op0) != V16QImode)
17212 orig_op0 = op0;
17213 op0 = gen_reg_rtx (V16QImode);
17215 op1 = gen_lowpart (V16QImode, op1);
17216 /* We will eventually emit movups based on insn attributes. */
17217 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17218 if (orig_op0)
17219 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17221 else if (TARGET_SSE2 && mode == V2DFmode)
17223 rtx zero;
17225 if (TARGET_AVX
17226 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17227 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17228 || optimize_insn_for_size_p ())
17230 /* We will eventually emit movups based on insn attributes. */
17231 emit_insn (gen_sse2_loadupd (op0, op1));
17232 return;
17235 /* When SSE registers are split into halves, we can avoid
17236 writing to the top half twice. */
17237 if (TARGET_SSE_SPLIT_REGS)
17239 emit_clobber (op0);
17240 zero = op0;
17242 else
17244 /* ??? Not sure about the best option for the Intel chips.
17245 The following would seem to satisfy; the register is
17246 entirely cleared, breaking the dependency chain. We
17247 then store to the upper half, with a dependency depth
17248 of one. A rumor has it that Intel recommends two movsd
17249 followed by an unpacklpd, but this is unconfirmed. And
17250 given that the dependency depth of the unpacklpd would
17251 still be one, I'm not sure why this would be better. */
17252 zero = CONST0_RTX (V2DFmode);
17255 m = adjust_address (op1, DFmode, 0);
17256 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17257 m = adjust_address (op1, DFmode, 8);
17258 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17260 else
17262 rtx t;
17264 if (TARGET_AVX
17265 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17266 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17267 || optimize_insn_for_size_p ())
17269 if (GET_MODE (op0) != V4SFmode)
17271 orig_op0 = op0;
17272 op0 = gen_reg_rtx (V4SFmode);
17274 op1 = gen_lowpart (V4SFmode, op1);
17275 emit_insn (gen_sse_loadups (op0, op1));
17276 if (orig_op0)
17277 emit_move_insn (orig_op0,
17278 gen_lowpart (GET_MODE (orig_op0), op0));
17279 return;
17282 if (mode != V4SFmode)
17283 t = gen_reg_rtx (V4SFmode);
17284 else
17285 t = op0;
17287 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17288 emit_move_insn (t, CONST0_RTX (V4SFmode));
17289 else
17290 emit_clobber (t);
17292 m = adjust_address (op1, V2SFmode, 0);
17293 emit_insn (gen_sse_loadlps (t, t, m));
17294 m = adjust_address (op1, V2SFmode, 8);
17295 emit_insn (gen_sse_loadhps (t, t, m));
17296 if (mode != V4SFmode)
17297 emit_move_insn (op0, gen_lowpart (mode, t));
17300 else if (MEM_P (op0))
17302 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17304 op0 = gen_lowpart (V16QImode, op0);
17305 op1 = gen_lowpart (V16QImode, op1);
17306 /* We will eventually emit movups based on insn attributes. */
17307 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17309 else if (TARGET_SSE2 && mode == V2DFmode)
17311 if (TARGET_AVX
17312 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17313 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17314 || optimize_insn_for_size_p ())
17315 /* We will eventually emit movups based on insn attributes. */
17316 emit_insn (gen_sse2_storeupd (op0, op1));
17317 else
17319 m = adjust_address (op0, DFmode, 0);
17320 emit_insn (gen_sse2_storelpd (m, op1));
17321 m = adjust_address (op0, DFmode, 8);
17322 emit_insn (gen_sse2_storehpd (m, op1));
17325 else
17327 if (mode != V4SFmode)
17328 op1 = gen_lowpart (V4SFmode, op1);
17330 if (TARGET_AVX
17331 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17332 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17333 || optimize_insn_for_size_p ())
17335 op0 = gen_lowpart (V4SFmode, op0);
17336 emit_insn (gen_sse_storeups (op0, op1));
17338 else
17340 m = adjust_address (op0, V2SFmode, 0);
17341 emit_insn (gen_sse_storelps (m, op1));
17342 m = adjust_address (op0, V2SFmode, 8);
17343 emit_insn (gen_sse_storehps (m, op1));
17347 else
17348 gcc_unreachable ();
17351 /* Helper function of ix86_fixup_binary_operands to canonicalize
17352 operand order. Returns true if the operands should be swapped. */
17354 static bool
17355 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17356 rtx operands[])
17358 rtx dst = operands[0];
17359 rtx src1 = operands[1];
17360 rtx src2 = operands[2];
17362 /* If the operation is not commutative, we can't do anything. */
17363 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17364 return false;
17366 /* Highest priority is that src1 should match dst. */
17367 if (rtx_equal_p (dst, src1))
17368 return false;
17369 if (rtx_equal_p (dst, src2))
17370 return true;
17372 /* Next highest priority is that immediate constants come second. */
17373 if (immediate_operand (src2, mode))
17374 return false;
17375 if (immediate_operand (src1, mode))
17376 return true;
17378 /* Lowest priority is that memory references should come second. */
17379 if (MEM_P (src2))
17380 return false;
17381 if (MEM_P (src1))
17382 return true;
17384 return false;
17388 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17389 destination to use for the operation. If different from the true
17390 destination in operands[0], a copy operation will be required. */
17393 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17394 rtx operands[])
17396 rtx dst = operands[0];
17397 rtx src1 = operands[1];
17398 rtx src2 = operands[2];
17400 /* Canonicalize operand order. */
17401 if (ix86_swap_binary_operands_p (code, mode, operands))
17403 rtx temp;
17405 /* It is invalid to swap operands of different modes. */
17406 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17408 temp = src1;
17409 src1 = src2;
17410 src2 = temp;
17413 /* Both source operands cannot be in memory. */
17414 if (MEM_P (src1) && MEM_P (src2))
17416 /* Optimization: Only read from memory once. */
17417 if (rtx_equal_p (src1, src2))
17419 src2 = force_reg (mode, src2);
17420 src1 = src2;
17422 else if (rtx_equal_p (dst, src1))
17423 src2 = force_reg (mode, src2);
17424 else
17425 src1 = force_reg (mode, src1);
17428 /* If the destination is memory, and we do not have matching source
17429 operands, do things in registers. */
17430 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17431 dst = gen_reg_rtx (mode);
17433 /* Source 1 cannot be a constant. */
17434 if (CONSTANT_P (src1))
17435 src1 = force_reg (mode, src1);
17437 /* Source 1 cannot be a non-matching memory. */
17438 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17439 src1 = force_reg (mode, src1);
17441 /* Improve address combine. */
17442 if (code == PLUS
17443 && GET_MODE_CLASS (mode) == MODE_INT
17444 && MEM_P (src2))
17445 src2 = force_reg (mode, src2);
17447 operands[1] = src1;
17448 operands[2] = src2;
17449 return dst;
17452 /* Similarly, but assume that the destination has already been
17453 set up properly. */
17455 void
17456 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17457 enum machine_mode mode, rtx operands[])
17459 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17460 gcc_assert (dst == operands[0]);
17463 /* Attempt to expand a binary operator. Make the expansion closer to the
17464 actual machine, then just general_operand, which will allow 3 separate
17465 memory references (one output, two input) in a single insn. */
17467 void
17468 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17469 rtx operands[])
17471 rtx src1, src2, dst, op, clob;
17473 dst = ix86_fixup_binary_operands (code, mode, operands);
17474 src1 = operands[1];
17475 src2 = operands[2];
17477 /* Emit the instruction. */
17479 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17480 if (reload_in_progress)
17482 /* Reload doesn't know about the flags register, and doesn't know that
17483 it doesn't want to clobber it. We can only do this with PLUS. */
17484 gcc_assert (code == PLUS);
17485 emit_insn (op);
17487 else if (reload_completed
17488 && code == PLUS
17489 && !rtx_equal_p (dst, src1))
17491 /* This is going to be an LEA; avoid splitting it later. */
17492 emit_insn (op);
17494 else
17496 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17497 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17500 /* Fix up the destination if needed. */
17501 if (dst != operands[0])
17502 emit_move_insn (operands[0], dst);
17505 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17506 the given OPERANDS. */
17508 void
17509 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17510 rtx operands[])
17512 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17513 if (GET_CODE (operands[1]) == SUBREG)
17515 op1 = operands[1];
17516 op2 = operands[2];
17518 else if (GET_CODE (operands[2]) == SUBREG)
17520 op1 = operands[2];
17521 op2 = operands[1];
17523 /* Optimize (__m128i) d | (__m128i) e and similar code
17524 when d and e are float vectors into float vector logical
17525 insn. In C/C++ without using intrinsics there is no other way
17526 to express vector logical operation on float vectors than
17527 to cast them temporarily to integer vectors. */
17528 if (op1
17529 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17530 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17531 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17532 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17533 && SUBREG_BYTE (op1) == 0
17534 && (GET_CODE (op2) == CONST_VECTOR
17535 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17536 && SUBREG_BYTE (op2) == 0))
17537 && can_create_pseudo_p ())
17539 rtx dst;
17540 switch (GET_MODE (SUBREG_REG (op1)))
17542 case V4SFmode:
17543 case V8SFmode:
17544 case V16SFmode:
17545 case V2DFmode:
17546 case V4DFmode:
17547 case V8DFmode:
17548 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17549 if (GET_CODE (op2) == CONST_VECTOR)
17551 op2 = gen_lowpart (GET_MODE (dst), op2);
17552 op2 = force_reg (GET_MODE (dst), op2);
17554 else
17556 op1 = operands[1];
17557 op2 = SUBREG_REG (operands[2]);
17558 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17559 op2 = force_reg (GET_MODE (dst), op2);
17561 op1 = SUBREG_REG (op1);
17562 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17563 op1 = force_reg (GET_MODE (dst), op1);
17564 emit_insn (gen_rtx_SET (VOIDmode, dst,
17565 gen_rtx_fmt_ee (code, GET_MODE (dst),
17566 op1, op2)));
17567 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17568 return;
17569 default:
17570 break;
17573 if (!nonimmediate_operand (operands[1], mode))
17574 operands[1] = force_reg (mode, operands[1]);
17575 if (!nonimmediate_operand (operands[2], mode))
17576 operands[2] = force_reg (mode, operands[2]);
17577 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17578 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17579 gen_rtx_fmt_ee (code, mode, operands[1],
17580 operands[2])));
17583 /* Return TRUE or FALSE depending on whether the binary operator meets the
17584 appropriate constraints. */
17586 bool
17587 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17588 rtx operands[3])
17590 rtx dst = operands[0];
17591 rtx src1 = operands[1];
17592 rtx src2 = operands[2];
17594 /* Both source operands cannot be in memory. */
17595 if (MEM_P (src1) && MEM_P (src2))
17596 return false;
17598 /* Canonicalize operand order for commutative operators. */
17599 if (ix86_swap_binary_operands_p (code, mode, operands))
17601 rtx temp = src1;
17602 src1 = src2;
17603 src2 = temp;
17606 /* If the destination is memory, we must have a matching source operand. */
17607 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17608 return false;
17610 /* Source 1 cannot be a constant. */
17611 if (CONSTANT_P (src1))
17612 return false;
17614 /* Source 1 cannot be a non-matching memory. */
17615 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17616 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17617 return (code == AND
17618 && (mode == HImode
17619 || mode == SImode
17620 || (TARGET_64BIT && mode == DImode))
17621 && satisfies_constraint_L (src2));
17623 return true;
17626 /* Attempt to expand a unary operator. Make the expansion closer to the
17627 actual machine, then just general_operand, which will allow 2 separate
17628 memory references (one output, one input) in a single insn. */
17630 void
17631 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17632 rtx operands[])
17634 int matching_memory;
17635 rtx src, dst, op, clob;
17637 dst = operands[0];
17638 src = operands[1];
17640 /* If the destination is memory, and we do not have matching source
17641 operands, do things in registers. */
17642 matching_memory = 0;
17643 if (MEM_P (dst))
17645 if (rtx_equal_p (dst, src))
17646 matching_memory = 1;
17647 else
17648 dst = gen_reg_rtx (mode);
17651 /* When source operand is memory, destination must match. */
17652 if (MEM_P (src) && !matching_memory)
17653 src = force_reg (mode, src);
17655 /* Emit the instruction. */
17657 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17658 if (reload_in_progress || code == NOT)
17660 /* Reload doesn't know about the flags register, and doesn't know that
17661 it doesn't want to clobber it. */
17662 gcc_assert (code == NOT);
17663 emit_insn (op);
17665 else
17667 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17668 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17671 /* Fix up the destination if needed. */
17672 if (dst != operands[0])
17673 emit_move_insn (operands[0], dst);
17676 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17677 divisor are within the range [0-255]. */
17679 void
17680 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17681 bool signed_p)
17683 rtx end_label, qimode_label;
17684 rtx insn, div, mod;
17685 rtx scratch, tmp0, tmp1, tmp2;
17686 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17687 rtx (*gen_zero_extend) (rtx, rtx);
17688 rtx (*gen_test_ccno_1) (rtx, rtx);
17690 switch (mode)
17692 case SImode:
17693 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17694 gen_test_ccno_1 = gen_testsi_ccno_1;
17695 gen_zero_extend = gen_zero_extendqisi2;
17696 break;
17697 case DImode:
17698 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17699 gen_test_ccno_1 = gen_testdi_ccno_1;
17700 gen_zero_extend = gen_zero_extendqidi2;
17701 break;
17702 default:
17703 gcc_unreachable ();
17706 end_label = gen_label_rtx ();
17707 qimode_label = gen_label_rtx ();
17709 scratch = gen_reg_rtx (mode);
17711 /* Use 8bit unsigned divimod if dividend and divisor are within
17712 the range [0-255]. */
17713 emit_move_insn (scratch, operands[2]);
17714 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17715 scratch, 1, OPTAB_DIRECT);
17716 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17717 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17718 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17719 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17720 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17721 pc_rtx);
17722 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17723 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17724 JUMP_LABEL (insn) = qimode_label;
17726 /* Generate original signed/unsigned divimod. */
17727 div = gen_divmod4_1 (operands[0], operands[1],
17728 operands[2], operands[3]);
17729 emit_insn (div);
17731 /* Branch to the end. */
17732 emit_jump_insn (gen_jump (end_label));
17733 emit_barrier ();
17735 /* Generate 8bit unsigned divide. */
17736 emit_label (qimode_label);
17737 /* Don't use operands[0] for result of 8bit divide since not all
17738 registers support QImode ZERO_EXTRACT. */
17739 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17740 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17741 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17742 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17744 if (signed_p)
17746 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17747 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17749 else
17751 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17752 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17755 /* Extract remainder from AH. */
17756 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17757 if (REG_P (operands[1]))
17758 insn = emit_move_insn (operands[1], tmp1);
17759 else
17761 /* Need a new scratch register since the old one has result
17762 of 8bit divide. */
17763 scratch = gen_reg_rtx (mode);
17764 emit_move_insn (scratch, tmp1);
17765 insn = emit_move_insn (operands[1], scratch);
17767 set_unique_reg_note (insn, REG_EQUAL, mod);
17769 /* Zero extend quotient from AL. */
17770 tmp1 = gen_lowpart (QImode, tmp0);
17771 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17772 set_unique_reg_note (insn, REG_EQUAL, div);
17774 emit_label (end_label);
17777 /* Whether it is OK to emit CFI directives when emitting asm code. */
17779 bool
17780 ix86_emit_cfi ()
17782 return dwarf2out_do_cfi_asm ();
17785 #define LEA_MAX_STALL (3)
17786 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17788 /* Increase given DISTANCE in half-cycles according to
17789 dependencies between PREV and NEXT instructions.
17790 Add 1 half-cycle if there is no dependency and
17791 go to next cycle if there is some dependecy. */
17793 static unsigned int
17794 increase_distance (rtx prev, rtx next, unsigned int distance)
17796 df_ref def, use;
17798 if (!prev || !next)
17799 return distance + (distance & 1) + 2;
17801 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17802 return distance + 1;
17804 FOR_EACH_INSN_USE (use, next)
17805 FOR_EACH_INSN_DEF (def, prev)
17806 if (!DF_REF_IS_ARTIFICIAL (def)
17807 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17808 return distance + (distance & 1) + 2;
17810 return distance + 1;
17813 /* Function checks if instruction INSN defines register number
17814 REGNO1 or REGNO2. */
17816 static bool
17817 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17818 rtx insn)
17820 df_ref def;
17822 FOR_EACH_INSN_DEF (def, insn)
17823 if (DF_REF_REG_DEF_P (def)
17824 && !DF_REF_IS_ARTIFICIAL (def)
17825 && (regno1 == DF_REF_REGNO (def)
17826 || regno2 == DF_REF_REGNO (def)))
17827 return true;
17829 return false;
17832 /* Function checks if instruction INSN uses register number
17833 REGNO as a part of address expression. */
17835 static bool
17836 insn_uses_reg_mem (unsigned int regno, rtx insn)
17838 df_ref use;
17840 FOR_EACH_INSN_USE (use, insn)
17841 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17842 return true;
17844 return false;
17847 /* Search backward for non-agu definition of register number REGNO1
17848 or register number REGNO2 in basic block starting from instruction
17849 START up to head of basic block or instruction INSN.
17851 Function puts true value into *FOUND var if definition was found
17852 and false otherwise.
17854 Distance in half-cycles between START and found instruction or head
17855 of BB is added to DISTANCE and returned. */
17857 static int
17858 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17859 rtx insn, int distance,
17860 rtx start, bool *found)
17862 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17863 rtx prev = start;
17864 rtx next = NULL;
17866 *found = false;
17868 while (prev
17869 && prev != insn
17870 && distance < LEA_SEARCH_THRESHOLD)
17872 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17874 distance = increase_distance (prev, next, distance);
17875 if (insn_defines_reg (regno1, regno2, prev))
17877 if (recog_memoized (prev) < 0
17878 || get_attr_type (prev) != TYPE_LEA)
17880 *found = true;
17881 return distance;
17885 next = prev;
17887 if (prev == BB_HEAD (bb))
17888 break;
17890 prev = PREV_INSN (prev);
17893 return distance;
17896 /* Search backward for non-agu definition of register number REGNO1
17897 or register number REGNO2 in INSN's basic block until
17898 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17899 2. Reach neighbour BBs boundary, or
17900 3. Reach agu definition.
17901 Returns the distance between the non-agu definition point and INSN.
17902 If no definition point, returns -1. */
17904 static int
17905 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17906 rtx insn)
17908 basic_block bb = BLOCK_FOR_INSN (insn);
17909 int distance = 0;
17910 bool found = false;
17912 if (insn != BB_HEAD (bb))
17913 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17914 distance, PREV_INSN (insn),
17915 &found);
17917 if (!found && distance < LEA_SEARCH_THRESHOLD)
17919 edge e;
17920 edge_iterator ei;
17921 bool simple_loop = false;
17923 FOR_EACH_EDGE (e, ei, bb->preds)
17924 if (e->src == bb)
17926 simple_loop = true;
17927 break;
17930 if (simple_loop)
17931 distance = distance_non_agu_define_in_bb (regno1, regno2,
17932 insn, distance,
17933 BB_END (bb), &found);
17934 else
17936 int shortest_dist = -1;
17937 bool found_in_bb = false;
17939 FOR_EACH_EDGE (e, ei, bb->preds)
17941 int bb_dist
17942 = distance_non_agu_define_in_bb (regno1, regno2,
17943 insn, distance,
17944 BB_END (e->src),
17945 &found_in_bb);
17946 if (found_in_bb)
17948 if (shortest_dist < 0)
17949 shortest_dist = bb_dist;
17950 else if (bb_dist > 0)
17951 shortest_dist = MIN (bb_dist, shortest_dist);
17953 found = true;
17957 distance = shortest_dist;
17961 /* get_attr_type may modify recog data. We want to make sure
17962 that recog data is valid for instruction INSN, on which
17963 distance_non_agu_define is called. INSN is unchanged here. */
17964 extract_insn_cached (insn);
17966 if (!found)
17967 return -1;
17969 return distance >> 1;
17972 /* Return the distance in half-cycles between INSN and the next
17973 insn that uses register number REGNO in memory address added
17974 to DISTANCE. Return -1 if REGNO0 is set.
17976 Put true value into *FOUND if register usage was found and
17977 false otherwise.
17978 Put true value into *REDEFINED if register redefinition was
17979 found and false otherwise. */
17981 static int
17982 distance_agu_use_in_bb (unsigned int regno,
17983 rtx insn, int distance, rtx start,
17984 bool *found, bool *redefined)
17986 basic_block bb = NULL;
17987 rtx next = start;
17988 rtx prev = NULL;
17990 *found = false;
17991 *redefined = false;
17993 if (start != NULL_RTX)
17995 bb = BLOCK_FOR_INSN (start);
17996 if (start != BB_HEAD (bb))
17997 /* If insn and start belong to the same bb, set prev to insn,
17998 so the call to increase_distance will increase the distance
17999 between insns by 1. */
18000 prev = insn;
18003 while (next
18004 && next != insn
18005 && distance < LEA_SEARCH_THRESHOLD)
18007 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18009 distance = increase_distance(prev, next, distance);
18010 if (insn_uses_reg_mem (regno, next))
18012 /* Return DISTANCE if OP0 is used in memory
18013 address in NEXT. */
18014 *found = true;
18015 return distance;
18018 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18020 /* Return -1 if OP0 is set in NEXT. */
18021 *redefined = true;
18022 return -1;
18025 prev = next;
18028 if (next == BB_END (bb))
18029 break;
18031 next = NEXT_INSN (next);
18034 return distance;
18037 /* Return the distance between INSN and the next insn that uses
18038 register number REGNO0 in memory address. Return -1 if no such
18039 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18041 static int
18042 distance_agu_use (unsigned int regno0, rtx insn)
18044 basic_block bb = BLOCK_FOR_INSN (insn);
18045 int distance = 0;
18046 bool found = false;
18047 bool redefined = false;
18049 if (insn != BB_END (bb))
18050 distance = distance_agu_use_in_bb (regno0, insn, distance,
18051 NEXT_INSN (insn),
18052 &found, &redefined);
18054 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18056 edge e;
18057 edge_iterator ei;
18058 bool simple_loop = false;
18060 FOR_EACH_EDGE (e, ei, bb->succs)
18061 if (e->dest == bb)
18063 simple_loop = true;
18064 break;
18067 if (simple_loop)
18068 distance = distance_agu_use_in_bb (regno0, insn,
18069 distance, BB_HEAD (bb),
18070 &found, &redefined);
18071 else
18073 int shortest_dist = -1;
18074 bool found_in_bb = false;
18075 bool redefined_in_bb = false;
18077 FOR_EACH_EDGE (e, ei, bb->succs)
18079 int bb_dist
18080 = distance_agu_use_in_bb (regno0, insn,
18081 distance, BB_HEAD (e->dest),
18082 &found_in_bb, &redefined_in_bb);
18083 if (found_in_bb)
18085 if (shortest_dist < 0)
18086 shortest_dist = bb_dist;
18087 else if (bb_dist > 0)
18088 shortest_dist = MIN (bb_dist, shortest_dist);
18090 found = true;
18094 distance = shortest_dist;
18098 if (!found || redefined)
18099 return -1;
18101 return distance >> 1;
18104 /* Define this macro to tune LEA priority vs ADD, it take effect when
18105 there is a dilemma of choicing LEA or ADD
18106 Negative value: ADD is more preferred than LEA
18107 Zero: Netrual
18108 Positive value: LEA is more preferred than ADD*/
18109 #define IX86_LEA_PRIORITY 0
18111 /* Return true if usage of lea INSN has performance advantage
18112 over a sequence of instructions. Instructions sequence has
18113 SPLIT_COST cycles higher latency than lea latency. */
18115 static bool
18116 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18117 unsigned int regno2, int split_cost, bool has_scale)
18119 int dist_define, dist_use;
18121 /* For Silvermont if using a 2-source or 3-source LEA for
18122 non-destructive destination purposes, or due to wanting
18123 ability to use SCALE, the use of LEA is justified. */
18124 if (TARGET_SILVERMONT || TARGET_INTEL)
18126 if (has_scale)
18127 return true;
18128 if (split_cost < 1)
18129 return false;
18130 if (regno0 == regno1 || regno0 == regno2)
18131 return false;
18132 return true;
18135 dist_define = distance_non_agu_define (regno1, regno2, insn);
18136 dist_use = distance_agu_use (regno0, insn);
18138 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18140 /* If there is no non AGU operand definition, no AGU
18141 operand usage and split cost is 0 then both lea
18142 and non lea variants have same priority. Currently
18143 we prefer lea for 64 bit code and non lea on 32 bit
18144 code. */
18145 if (dist_use < 0 && split_cost == 0)
18146 return TARGET_64BIT || IX86_LEA_PRIORITY;
18147 else
18148 return true;
18151 /* With longer definitions distance lea is more preferable.
18152 Here we change it to take into account splitting cost and
18153 lea priority. */
18154 dist_define += split_cost + IX86_LEA_PRIORITY;
18156 /* If there is no use in memory addess then we just check
18157 that split cost exceeds AGU stall. */
18158 if (dist_use < 0)
18159 return dist_define > LEA_MAX_STALL;
18161 /* If this insn has both backward non-agu dependence and forward
18162 agu dependence, the one with short distance takes effect. */
18163 return dist_define >= dist_use;
18166 /* Return true if it is legal to clobber flags by INSN and
18167 false otherwise. */
18169 static bool
18170 ix86_ok_to_clobber_flags (rtx insn)
18172 basic_block bb = BLOCK_FOR_INSN (insn);
18173 df_ref use;
18174 bitmap live;
18176 while (insn)
18178 if (NONDEBUG_INSN_P (insn))
18180 FOR_EACH_INSN_USE (use, insn)
18181 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18182 return false;
18184 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18185 return true;
18188 if (insn == BB_END (bb))
18189 break;
18191 insn = NEXT_INSN (insn);
18194 live = df_get_live_out(bb);
18195 return !REGNO_REG_SET_P (live, FLAGS_REG);
18198 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18199 move and add to avoid AGU stalls. */
18201 bool
18202 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18204 unsigned int regno0, regno1, regno2;
18206 /* Check if we need to optimize. */
18207 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18208 return false;
18210 /* Check it is correct to split here. */
18211 if (!ix86_ok_to_clobber_flags(insn))
18212 return false;
18214 regno0 = true_regnum (operands[0]);
18215 regno1 = true_regnum (operands[1]);
18216 regno2 = true_regnum (operands[2]);
18218 /* We need to split only adds with non destructive
18219 destination operand. */
18220 if (regno0 == regno1 || regno0 == regno2)
18221 return false;
18222 else
18223 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18226 /* Return true if we should emit lea instruction instead of mov
18227 instruction. */
18229 bool
18230 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18232 unsigned int regno0, regno1;
18234 /* Check if we need to optimize. */
18235 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18236 return false;
18238 /* Use lea for reg to reg moves only. */
18239 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18240 return false;
18242 regno0 = true_regnum (operands[0]);
18243 regno1 = true_regnum (operands[1]);
18245 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18248 /* Return true if we need to split lea into a sequence of
18249 instructions to avoid AGU stalls. */
18251 bool
18252 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18254 unsigned int regno0, regno1, regno2;
18255 int split_cost;
18256 struct ix86_address parts;
18257 int ok;
18259 /* Check we need to optimize. */
18260 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18261 return false;
18263 /* The "at least two components" test below might not catch simple
18264 move or zero extension insns if parts.base is non-NULL and parts.disp
18265 is const0_rtx as the only components in the address, e.g. if the
18266 register is %rbp or %r13. As this test is much cheaper and moves or
18267 zero extensions are the common case, do this check first. */
18268 if (REG_P (operands[1])
18269 || (SImode_address_operand (operands[1], VOIDmode)
18270 && REG_P (XEXP (operands[1], 0))))
18271 return false;
18273 /* Check if it is OK to split here. */
18274 if (!ix86_ok_to_clobber_flags (insn))
18275 return false;
18277 ok = ix86_decompose_address (operands[1], &parts);
18278 gcc_assert (ok);
18280 /* There should be at least two components in the address. */
18281 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18282 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18283 return false;
18285 /* We should not split into add if non legitimate pic
18286 operand is used as displacement. */
18287 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18288 return false;
18290 regno0 = true_regnum (operands[0]) ;
18291 regno1 = INVALID_REGNUM;
18292 regno2 = INVALID_REGNUM;
18294 if (parts.base)
18295 regno1 = true_regnum (parts.base);
18296 if (parts.index)
18297 regno2 = true_regnum (parts.index);
18299 split_cost = 0;
18301 /* Compute how many cycles we will add to execution time
18302 if split lea into a sequence of instructions. */
18303 if (parts.base || parts.index)
18305 /* Have to use mov instruction if non desctructive
18306 destination form is used. */
18307 if (regno1 != regno0 && regno2 != regno0)
18308 split_cost += 1;
18310 /* Have to add index to base if both exist. */
18311 if (parts.base && parts.index)
18312 split_cost += 1;
18314 /* Have to use shift and adds if scale is 2 or greater. */
18315 if (parts.scale > 1)
18317 if (regno0 != regno1)
18318 split_cost += 1;
18319 else if (regno2 == regno0)
18320 split_cost += 4;
18321 else
18322 split_cost += parts.scale;
18325 /* Have to use add instruction with immediate if
18326 disp is non zero. */
18327 if (parts.disp && parts.disp != const0_rtx)
18328 split_cost += 1;
18330 /* Subtract the price of lea. */
18331 split_cost -= 1;
18334 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18335 parts.scale > 1);
18338 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18339 matches destination. RTX includes clobber of FLAGS_REG. */
18341 static void
18342 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18343 rtx dst, rtx src)
18345 rtx op, clob;
18347 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18348 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18350 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18353 /* Return true if regno1 def is nearest to the insn. */
18355 static bool
18356 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18358 rtx prev = insn;
18359 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18361 if (insn == start)
18362 return false;
18363 while (prev && prev != start)
18365 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18367 prev = PREV_INSN (prev);
18368 continue;
18370 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18371 return true;
18372 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18373 return false;
18374 prev = PREV_INSN (prev);
18377 /* None of the regs is defined in the bb. */
18378 return false;
18381 /* Split lea instructions into a sequence of instructions
18382 which are executed on ALU to avoid AGU stalls.
18383 It is assumed that it is allowed to clobber flags register
18384 at lea position. */
18386 void
18387 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18389 unsigned int regno0, regno1, regno2;
18390 struct ix86_address parts;
18391 rtx target, tmp;
18392 int ok, adds;
18394 ok = ix86_decompose_address (operands[1], &parts);
18395 gcc_assert (ok);
18397 target = gen_lowpart (mode, operands[0]);
18399 regno0 = true_regnum (target);
18400 regno1 = INVALID_REGNUM;
18401 regno2 = INVALID_REGNUM;
18403 if (parts.base)
18405 parts.base = gen_lowpart (mode, parts.base);
18406 regno1 = true_regnum (parts.base);
18409 if (parts.index)
18411 parts.index = gen_lowpart (mode, parts.index);
18412 regno2 = true_regnum (parts.index);
18415 if (parts.disp)
18416 parts.disp = gen_lowpart (mode, parts.disp);
18418 if (parts.scale > 1)
18420 /* Case r1 = r1 + ... */
18421 if (regno1 == regno0)
18423 /* If we have a case r1 = r1 + C * r2 then we
18424 should use multiplication which is very
18425 expensive. Assume cost model is wrong if we
18426 have such case here. */
18427 gcc_assert (regno2 != regno0);
18429 for (adds = parts.scale; adds > 0; adds--)
18430 ix86_emit_binop (PLUS, mode, target, parts.index);
18432 else
18434 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18435 if (regno0 != regno2)
18436 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18438 /* Use shift for scaling. */
18439 ix86_emit_binop (ASHIFT, mode, target,
18440 GEN_INT (exact_log2 (parts.scale)));
18442 if (parts.base)
18443 ix86_emit_binop (PLUS, mode, target, parts.base);
18445 if (parts.disp && parts.disp != const0_rtx)
18446 ix86_emit_binop (PLUS, mode, target, parts.disp);
18449 else if (!parts.base && !parts.index)
18451 gcc_assert(parts.disp);
18452 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18454 else
18456 if (!parts.base)
18458 if (regno0 != regno2)
18459 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18461 else if (!parts.index)
18463 if (regno0 != regno1)
18464 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18466 else
18468 if (regno0 == regno1)
18469 tmp = parts.index;
18470 else if (regno0 == regno2)
18471 tmp = parts.base;
18472 else
18474 rtx tmp1;
18476 /* Find better operand for SET instruction, depending
18477 on which definition is farther from the insn. */
18478 if (find_nearest_reg_def (insn, regno1, regno2))
18479 tmp = parts.index, tmp1 = parts.base;
18480 else
18481 tmp = parts.base, tmp1 = parts.index;
18483 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18485 if (parts.disp && parts.disp != const0_rtx)
18486 ix86_emit_binop (PLUS, mode, target, parts.disp);
18488 ix86_emit_binop (PLUS, mode, target, tmp1);
18489 return;
18492 ix86_emit_binop (PLUS, mode, target, tmp);
18495 if (parts.disp && parts.disp != const0_rtx)
18496 ix86_emit_binop (PLUS, mode, target, parts.disp);
18500 /* Return true if it is ok to optimize an ADD operation to LEA
18501 operation to avoid flag register consumation. For most processors,
18502 ADD is faster than LEA. For the processors like BONNELL, if the
18503 destination register of LEA holds an actual address which will be
18504 used soon, LEA is better and otherwise ADD is better. */
18506 bool
18507 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18509 unsigned int regno0 = true_regnum (operands[0]);
18510 unsigned int regno1 = true_regnum (operands[1]);
18511 unsigned int regno2 = true_regnum (operands[2]);
18513 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18514 if (regno0 != regno1 && regno0 != regno2)
18515 return true;
18517 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18518 return false;
18520 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18523 /* Return true if destination reg of SET_BODY is shift count of
18524 USE_BODY. */
18526 static bool
18527 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18529 rtx set_dest;
18530 rtx shift_rtx;
18531 int i;
18533 /* Retrieve destination of SET_BODY. */
18534 switch (GET_CODE (set_body))
18536 case SET:
18537 set_dest = SET_DEST (set_body);
18538 if (!set_dest || !REG_P (set_dest))
18539 return false;
18540 break;
18541 case PARALLEL:
18542 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18543 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18544 use_body))
18545 return true;
18546 default:
18547 return false;
18548 break;
18551 /* Retrieve shift count of USE_BODY. */
18552 switch (GET_CODE (use_body))
18554 case SET:
18555 shift_rtx = XEXP (use_body, 1);
18556 break;
18557 case PARALLEL:
18558 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18559 if (ix86_dep_by_shift_count_body (set_body,
18560 XVECEXP (use_body, 0, i)))
18561 return true;
18562 default:
18563 return false;
18564 break;
18567 if (shift_rtx
18568 && (GET_CODE (shift_rtx) == ASHIFT
18569 || GET_CODE (shift_rtx) == LSHIFTRT
18570 || GET_CODE (shift_rtx) == ASHIFTRT
18571 || GET_CODE (shift_rtx) == ROTATE
18572 || GET_CODE (shift_rtx) == ROTATERT))
18574 rtx shift_count = XEXP (shift_rtx, 1);
18576 /* Return true if shift count is dest of SET_BODY. */
18577 if (REG_P (shift_count))
18579 /* Add check since it can be invoked before register
18580 allocation in pre-reload schedule. */
18581 if (reload_completed
18582 && true_regnum (set_dest) == true_regnum (shift_count))
18583 return true;
18584 else if (REGNO(set_dest) == REGNO(shift_count))
18585 return true;
18589 return false;
18592 /* Return true if destination reg of SET_INSN is shift count of
18593 USE_INSN. */
18595 bool
18596 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18598 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18599 PATTERN (use_insn));
18602 /* Return TRUE or FALSE depending on whether the unary operator meets the
18603 appropriate constraints. */
18605 bool
18606 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18607 enum machine_mode mode ATTRIBUTE_UNUSED,
18608 rtx operands[2])
18610 /* If one of operands is memory, source and destination must match. */
18611 if ((MEM_P (operands[0])
18612 || MEM_P (operands[1]))
18613 && ! rtx_equal_p (operands[0], operands[1]))
18614 return false;
18615 return true;
18618 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18619 are ok, keeping in mind the possible movddup alternative. */
18621 bool
18622 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18624 if (MEM_P (operands[0]))
18625 return rtx_equal_p (operands[0], operands[1 + high]);
18626 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18627 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18628 return true;
18631 /* Post-reload splitter for converting an SF or DFmode value in an
18632 SSE register into an unsigned SImode. */
18634 void
18635 ix86_split_convert_uns_si_sse (rtx operands[])
18637 enum machine_mode vecmode;
18638 rtx value, large, zero_or_two31, input, two31, x;
18640 large = operands[1];
18641 zero_or_two31 = operands[2];
18642 input = operands[3];
18643 two31 = operands[4];
18644 vecmode = GET_MODE (large);
18645 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18647 /* Load up the value into the low element. We must ensure that the other
18648 elements are valid floats -- zero is the easiest such value. */
18649 if (MEM_P (input))
18651 if (vecmode == V4SFmode)
18652 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18653 else
18654 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18656 else
18658 input = gen_rtx_REG (vecmode, REGNO (input));
18659 emit_move_insn (value, CONST0_RTX (vecmode));
18660 if (vecmode == V4SFmode)
18661 emit_insn (gen_sse_movss (value, value, input));
18662 else
18663 emit_insn (gen_sse2_movsd (value, value, input));
18666 emit_move_insn (large, two31);
18667 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18669 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18670 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18672 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18673 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18675 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18676 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18678 large = gen_rtx_REG (V4SImode, REGNO (large));
18679 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18681 x = gen_rtx_REG (V4SImode, REGNO (value));
18682 if (vecmode == V4SFmode)
18683 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18684 else
18685 emit_insn (gen_sse2_cvttpd2dq (x, value));
18686 value = x;
18688 emit_insn (gen_xorv4si3 (value, value, large));
18691 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18692 Expects the 64-bit DImode to be supplied in a pair of integral
18693 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18694 -mfpmath=sse, !optimize_size only. */
18696 void
18697 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18699 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18700 rtx int_xmm, fp_xmm;
18701 rtx biases, exponents;
18702 rtx x;
18704 int_xmm = gen_reg_rtx (V4SImode);
18705 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18706 emit_insn (gen_movdi_to_sse (int_xmm, input));
18707 else if (TARGET_SSE_SPLIT_REGS)
18709 emit_clobber (int_xmm);
18710 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18712 else
18714 x = gen_reg_rtx (V2DImode);
18715 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18716 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18719 x = gen_rtx_CONST_VECTOR (V4SImode,
18720 gen_rtvec (4, GEN_INT (0x43300000UL),
18721 GEN_INT (0x45300000UL),
18722 const0_rtx, const0_rtx));
18723 exponents = validize_mem (force_const_mem (V4SImode, x));
18725 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18726 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18728 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18729 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18730 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18731 (0x1.0p84 + double(fp_value_hi_xmm)).
18732 Note these exponents differ by 32. */
18734 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18736 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18737 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18738 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18739 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18740 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18741 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18742 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18743 biases = validize_mem (force_const_mem (V2DFmode, biases));
18744 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18746 /* Add the upper and lower DFmode values together. */
18747 if (TARGET_SSE3)
18748 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18749 else
18751 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18752 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18753 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18756 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18759 /* Not used, but eases macroization of patterns. */
18760 void
18761 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18762 rtx input ATTRIBUTE_UNUSED)
18764 gcc_unreachable ();
18767 /* Convert an unsigned SImode value into a DFmode. Only currently used
18768 for SSE, but applicable anywhere. */
18770 void
18771 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18773 REAL_VALUE_TYPE TWO31r;
18774 rtx x, fp;
18776 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18777 NULL, 1, OPTAB_DIRECT);
18779 fp = gen_reg_rtx (DFmode);
18780 emit_insn (gen_floatsidf2 (fp, x));
18782 real_ldexp (&TWO31r, &dconst1, 31);
18783 x = const_double_from_real_value (TWO31r, DFmode);
18785 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18786 if (x != target)
18787 emit_move_insn (target, x);
18790 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18791 32-bit mode; otherwise we have a direct convert instruction. */
18793 void
18794 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18796 REAL_VALUE_TYPE TWO32r;
18797 rtx fp_lo, fp_hi, x;
18799 fp_lo = gen_reg_rtx (DFmode);
18800 fp_hi = gen_reg_rtx (DFmode);
18802 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18804 real_ldexp (&TWO32r, &dconst1, 32);
18805 x = const_double_from_real_value (TWO32r, DFmode);
18806 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18808 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18810 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18811 0, OPTAB_DIRECT);
18812 if (x != target)
18813 emit_move_insn (target, x);
18816 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18817 For x86_32, -mfpmath=sse, !optimize_size only. */
18818 void
18819 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18821 REAL_VALUE_TYPE ONE16r;
18822 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18824 real_ldexp (&ONE16r, &dconst1, 16);
18825 x = const_double_from_real_value (ONE16r, SFmode);
18826 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18827 NULL, 0, OPTAB_DIRECT);
18828 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18829 NULL, 0, OPTAB_DIRECT);
18830 fp_hi = gen_reg_rtx (SFmode);
18831 fp_lo = gen_reg_rtx (SFmode);
18832 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18833 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18834 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18835 0, OPTAB_DIRECT);
18836 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18837 0, OPTAB_DIRECT);
18838 if (!rtx_equal_p (target, fp_hi))
18839 emit_move_insn (target, fp_hi);
18842 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18843 a vector of unsigned ints VAL to vector of floats TARGET. */
18845 void
18846 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18848 rtx tmp[8];
18849 REAL_VALUE_TYPE TWO16r;
18850 enum machine_mode intmode = GET_MODE (val);
18851 enum machine_mode fltmode = GET_MODE (target);
18852 rtx (*cvt) (rtx, rtx);
18854 if (intmode == V16SImode)
18856 emit_insn (gen_ufloatv16siv16sf2 (target, val));
18857 return;
18859 if (TARGET_AVX512VL)
18861 if (intmode == V4SImode)
18862 emit_insn (gen_ufloatv4siv4sf2 (target, val));
18863 else
18864 emit_insn (gen_ufloatv8siv8sf2 (target, val));
18865 return;
18867 if (intmode == V4SImode)
18868 cvt = gen_floatv4siv4sf2;
18869 else
18870 cvt = gen_floatv8siv8sf2;
18871 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18872 tmp[0] = force_reg (intmode, tmp[0]);
18873 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18874 OPTAB_DIRECT);
18875 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18876 NULL_RTX, 1, OPTAB_DIRECT);
18877 tmp[3] = gen_reg_rtx (fltmode);
18878 emit_insn (cvt (tmp[3], tmp[1]));
18879 tmp[4] = gen_reg_rtx (fltmode);
18880 emit_insn (cvt (tmp[4], tmp[2]));
18881 real_ldexp (&TWO16r, &dconst1, 16);
18882 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18883 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18884 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18885 OPTAB_DIRECT);
18886 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18887 OPTAB_DIRECT);
18888 if (tmp[7] != target)
18889 emit_move_insn (target, tmp[7]);
18892 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18893 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18894 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18895 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18898 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18900 REAL_VALUE_TYPE TWO31r;
18901 rtx two31r, tmp[4];
18902 enum machine_mode mode = GET_MODE (val);
18903 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18904 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18905 rtx (*cmp) (rtx, rtx, rtx, rtx);
18906 int i;
18908 for (i = 0; i < 3; i++)
18909 tmp[i] = gen_reg_rtx (mode);
18910 real_ldexp (&TWO31r, &dconst1, 31);
18911 two31r = const_double_from_real_value (TWO31r, scalarmode);
18912 two31r = ix86_build_const_vector (mode, 1, two31r);
18913 two31r = force_reg (mode, two31r);
18914 switch (mode)
18916 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18917 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18918 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18919 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18920 default: gcc_unreachable ();
18922 tmp[3] = gen_rtx_LE (mode, two31r, val);
18923 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18924 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18925 0, OPTAB_DIRECT);
18926 if (intmode == V4SImode || TARGET_AVX2)
18927 *xorp = expand_simple_binop (intmode, ASHIFT,
18928 gen_lowpart (intmode, tmp[0]),
18929 GEN_INT (31), NULL_RTX, 0,
18930 OPTAB_DIRECT);
18931 else
18933 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18934 two31 = ix86_build_const_vector (intmode, 1, two31);
18935 *xorp = expand_simple_binop (intmode, AND,
18936 gen_lowpart (intmode, tmp[0]),
18937 two31, NULL_RTX, 0,
18938 OPTAB_DIRECT);
18940 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18941 0, OPTAB_DIRECT);
18944 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18945 then replicate the value for all elements of the vector
18946 register. */
18949 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18951 int i, n_elt;
18952 rtvec v;
18953 enum machine_mode scalar_mode;
18955 switch (mode)
18957 case V64QImode:
18958 case V32QImode:
18959 case V16QImode:
18960 case V32HImode:
18961 case V16HImode:
18962 case V8HImode:
18963 case V16SImode:
18964 case V8SImode:
18965 case V4SImode:
18966 case V8DImode:
18967 case V4DImode:
18968 case V2DImode:
18969 gcc_assert (vect);
18970 case V16SFmode:
18971 case V8SFmode:
18972 case V4SFmode:
18973 case V8DFmode:
18974 case V4DFmode:
18975 case V2DFmode:
18976 n_elt = GET_MODE_NUNITS (mode);
18977 v = rtvec_alloc (n_elt);
18978 scalar_mode = GET_MODE_INNER (mode);
18980 RTVEC_ELT (v, 0) = value;
18982 for (i = 1; i < n_elt; ++i)
18983 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18985 return gen_rtx_CONST_VECTOR (mode, v);
18987 default:
18988 gcc_unreachable ();
18992 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18993 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18994 for an SSE register. If VECT is true, then replicate the mask for
18995 all elements of the vector register. If INVERT is true, then create
18996 a mask excluding the sign bit. */
18999 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
19001 enum machine_mode vec_mode, imode;
19002 HOST_WIDE_INT hi, lo;
19003 int shift = 63;
19004 rtx v;
19005 rtx mask;
19007 /* Find the sign bit, sign extended to 2*HWI. */
19008 switch (mode)
19010 case V16SImode:
19011 case V16SFmode:
19012 case V8SImode:
19013 case V4SImode:
19014 case V8SFmode:
19015 case V4SFmode:
19016 vec_mode = mode;
19017 mode = GET_MODE_INNER (mode);
19018 imode = SImode;
19019 lo = 0x80000000, hi = lo < 0;
19020 break;
19022 case V8DImode:
19023 case V4DImode:
19024 case V2DImode:
19025 case V8DFmode:
19026 case V4DFmode:
19027 case V2DFmode:
19028 vec_mode = mode;
19029 mode = GET_MODE_INNER (mode);
19030 imode = DImode;
19031 if (HOST_BITS_PER_WIDE_INT >= 64)
19032 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19033 else
19034 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19035 break;
19037 case TImode:
19038 case TFmode:
19039 vec_mode = VOIDmode;
19040 if (HOST_BITS_PER_WIDE_INT >= 64)
19042 imode = TImode;
19043 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19045 else
19047 rtvec vec;
19049 imode = DImode;
19050 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19052 if (invert)
19054 lo = ~lo, hi = ~hi;
19055 v = constm1_rtx;
19057 else
19058 v = const0_rtx;
19060 mask = immed_double_const (lo, hi, imode);
19062 vec = gen_rtvec (2, v, mask);
19063 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19064 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19066 return v;
19068 break;
19070 default:
19071 gcc_unreachable ();
19074 if (invert)
19075 lo = ~lo, hi = ~hi;
19077 /* Force this value into the low part of a fp vector constant. */
19078 mask = immed_double_const (lo, hi, imode);
19079 mask = gen_lowpart (mode, mask);
19081 if (vec_mode == VOIDmode)
19082 return force_reg (mode, mask);
19084 v = ix86_build_const_vector (vec_mode, vect, mask);
19085 return force_reg (vec_mode, v);
19088 /* Generate code for floating point ABS or NEG. */
19090 void
19091 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19092 rtx operands[])
19094 rtx mask, set, dst, src;
19095 bool use_sse = false;
19096 bool vector_mode = VECTOR_MODE_P (mode);
19097 enum machine_mode vmode = mode;
19099 if (vector_mode)
19100 use_sse = true;
19101 else if (mode == TFmode)
19102 use_sse = true;
19103 else if (TARGET_SSE_MATH)
19105 use_sse = SSE_FLOAT_MODE_P (mode);
19106 if (mode == SFmode)
19107 vmode = V4SFmode;
19108 else if (mode == DFmode)
19109 vmode = V2DFmode;
19112 /* NEG and ABS performed with SSE use bitwise mask operations.
19113 Create the appropriate mask now. */
19114 if (use_sse)
19115 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19116 else
19117 mask = NULL_RTX;
19119 dst = operands[0];
19120 src = operands[1];
19122 set = gen_rtx_fmt_e (code, mode, src);
19123 set = gen_rtx_SET (VOIDmode, dst, set);
19125 if (mask)
19127 rtx use, clob;
19128 rtvec par;
19130 use = gen_rtx_USE (VOIDmode, mask);
19131 if (vector_mode)
19132 par = gen_rtvec (2, set, use);
19133 else
19135 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19136 par = gen_rtvec (3, set, use, clob);
19138 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19140 else
19141 emit_insn (set);
19144 /* Expand a copysign operation. Special case operand 0 being a constant. */
19146 void
19147 ix86_expand_copysign (rtx operands[])
19149 enum machine_mode mode, vmode;
19150 rtx dest, op0, op1, mask, nmask;
19152 dest = operands[0];
19153 op0 = operands[1];
19154 op1 = operands[2];
19156 mode = GET_MODE (dest);
19158 if (mode == SFmode)
19159 vmode = V4SFmode;
19160 else if (mode == DFmode)
19161 vmode = V2DFmode;
19162 else
19163 vmode = mode;
19165 if (GET_CODE (op0) == CONST_DOUBLE)
19167 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19169 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19170 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19172 if (mode == SFmode || mode == DFmode)
19174 if (op0 == CONST0_RTX (mode))
19175 op0 = CONST0_RTX (vmode);
19176 else
19178 rtx v = ix86_build_const_vector (vmode, false, op0);
19180 op0 = force_reg (vmode, v);
19183 else if (op0 != CONST0_RTX (mode))
19184 op0 = force_reg (mode, op0);
19186 mask = ix86_build_signbit_mask (vmode, 0, 0);
19188 if (mode == SFmode)
19189 copysign_insn = gen_copysignsf3_const;
19190 else if (mode == DFmode)
19191 copysign_insn = gen_copysigndf3_const;
19192 else
19193 copysign_insn = gen_copysigntf3_const;
19195 emit_insn (copysign_insn (dest, op0, op1, mask));
19197 else
19199 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19201 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19202 mask = ix86_build_signbit_mask (vmode, 0, 0);
19204 if (mode == SFmode)
19205 copysign_insn = gen_copysignsf3_var;
19206 else if (mode == DFmode)
19207 copysign_insn = gen_copysigndf3_var;
19208 else
19209 copysign_insn = gen_copysigntf3_var;
19211 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19215 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19216 be a constant, and so has already been expanded into a vector constant. */
19218 void
19219 ix86_split_copysign_const (rtx operands[])
19221 enum machine_mode mode, vmode;
19222 rtx dest, op0, mask, x;
19224 dest = operands[0];
19225 op0 = operands[1];
19226 mask = operands[3];
19228 mode = GET_MODE (dest);
19229 vmode = GET_MODE (mask);
19231 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19232 x = gen_rtx_AND (vmode, dest, mask);
19233 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19235 if (op0 != CONST0_RTX (vmode))
19237 x = gen_rtx_IOR (vmode, dest, op0);
19238 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19242 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19243 so we have to do two masks. */
19245 void
19246 ix86_split_copysign_var (rtx operands[])
19248 enum machine_mode mode, vmode;
19249 rtx dest, scratch, op0, op1, mask, nmask, x;
19251 dest = operands[0];
19252 scratch = operands[1];
19253 op0 = operands[2];
19254 op1 = operands[3];
19255 nmask = operands[4];
19256 mask = operands[5];
19258 mode = GET_MODE (dest);
19259 vmode = GET_MODE (mask);
19261 if (rtx_equal_p (op0, op1))
19263 /* Shouldn't happen often (it's useless, obviously), but when it does
19264 we'd generate incorrect code if we continue below. */
19265 emit_move_insn (dest, op0);
19266 return;
19269 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19271 gcc_assert (REGNO (op1) == REGNO (scratch));
19273 x = gen_rtx_AND (vmode, scratch, mask);
19274 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19276 dest = mask;
19277 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19278 x = gen_rtx_NOT (vmode, dest);
19279 x = gen_rtx_AND (vmode, x, op0);
19280 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19282 else
19284 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19286 x = gen_rtx_AND (vmode, scratch, mask);
19288 else /* alternative 2,4 */
19290 gcc_assert (REGNO (mask) == REGNO (scratch));
19291 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19292 x = gen_rtx_AND (vmode, scratch, op1);
19294 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19296 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19298 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19299 x = gen_rtx_AND (vmode, dest, nmask);
19301 else /* alternative 3,4 */
19303 gcc_assert (REGNO (nmask) == REGNO (dest));
19304 dest = nmask;
19305 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19306 x = gen_rtx_AND (vmode, dest, op0);
19308 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19311 x = gen_rtx_IOR (vmode, dest, scratch);
19312 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19315 /* Return TRUE or FALSE depending on whether the first SET in INSN
19316 has source and destination with matching CC modes, and that the
19317 CC mode is at least as constrained as REQ_MODE. */
19319 bool
19320 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19322 rtx set;
19323 enum machine_mode set_mode;
19325 set = PATTERN (insn);
19326 if (GET_CODE (set) == PARALLEL)
19327 set = XVECEXP (set, 0, 0);
19328 gcc_assert (GET_CODE (set) == SET);
19329 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19331 set_mode = GET_MODE (SET_DEST (set));
19332 switch (set_mode)
19334 case CCNOmode:
19335 if (req_mode != CCNOmode
19336 && (req_mode != CCmode
19337 || XEXP (SET_SRC (set), 1) != const0_rtx))
19338 return false;
19339 break;
19340 case CCmode:
19341 if (req_mode == CCGCmode)
19342 return false;
19343 /* FALLTHRU */
19344 case CCGCmode:
19345 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19346 return false;
19347 /* FALLTHRU */
19348 case CCGOCmode:
19349 if (req_mode == CCZmode)
19350 return false;
19351 /* FALLTHRU */
19352 case CCZmode:
19353 break;
19355 case CCAmode:
19356 case CCCmode:
19357 case CCOmode:
19358 case CCSmode:
19359 if (set_mode != req_mode)
19360 return false;
19361 break;
19363 default:
19364 gcc_unreachable ();
19367 return GET_MODE (SET_SRC (set)) == set_mode;
19370 /* Generate insn patterns to do an integer compare of OPERANDS. */
19372 static rtx
19373 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19375 enum machine_mode cmpmode;
19376 rtx tmp, flags;
19378 cmpmode = SELECT_CC_MODE (code, op0, op1);
19379 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19381 /* This is very simple, but making the interface the same as in the
19382 FP case makes the rest of the code easier. */
19383 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19384 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19386 /* Return the test that should be put into the flags user, i.e.
19387 the bcc, scc, or cmov instruction. */
19388 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19391 /* Figure out whether to use ordered or unordered fp comparisons.
19392 Return the appropriate mode to use. */
19394 enum machine_mode
19395 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19397 /* ??? In order to make all comparisons reversible, we do all comparisons
19398 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19399 all forms trapping and nontrapping comparisons, we can make inequality
19400 comparisons trapping again, since it results in better code when using
19401 FCOM based compares. */
19402 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19405 enum machine_mode
19406 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19408 enum machine_mode mode = GET_MODE (op0);
19410 if (SCALAR_FLOAT_MODE_P (mode))
19412 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19413 return ix86_fp_compare_mode (code);
19416 switch (code)
19418 /* Only zero flag is needed. */
19419 case EQ: /* ZF=0 */
19420 case NE: /* ZF!=0 */
19421 return CCZmode;
19422 /* Codes needing carry flag. */
19423 case GEU: /* CF=0 */
19424 case LTU: /* CF=1 */
19425 /* Detect overflow checks. They need just the carry flag. */
19426 if (GET_CODE (op0) == PLUS
19427 && rtx_equal_p (op1, XEXP (op0, 0)))
19428 return CCCmode;
19429 else
19430 return CCmode;
19431 case GTU: /* CF=0 & ZF=0 */
19432 case LEU: /* CF=1 | ZF=1 */
19433 return CCmode;
19434 /* Codes possibly doable only with sign flag when
19435 comparing against zero. */
19436 case GE: /* SF=OF or SF=0 */
19437 case LT: /* SF<>OF or SF=1 */
19438 if (op1 == const0_rtx)
19439 return CCGOCmode;
19440 else
19441 /* For other cases Carry flag is not required. */
19442 return CCGCmode;
19443 /* Codes doable only with sign flag when comparing
19444 against zero, but we miss jump instruction for it
19445 so we need to use relational tests against overflow
19446 that thus needs to be zero. */
19447 case GT: /* ZF=0 & SF=OF */
19448 case LE: /* ZF=1 | SF<>OF */
19449 if (op1 == const0_rtx)
19450 return CCNOmode;
19451 else
19452 return CCGCmode;
19453 /* strcmp pattern do (use flags) and combine may ask us for proper
19454 mode. */
19455 case USE:
19456 return CCmode;
19457 default:
19458 gcc_unreachable ();
19462 /* Return the fixed registers used for condition codes. */
19464 static bool
19465 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19467 *p1 = FLAGS_REG;
19468 *p2 = FPSR_REG;
19469 return true;
19472 /* If two condition code modes are compatible, return a condition code
19473 mode which is compatible with both. Otherwise, return
19474 VOIDmode. */
19476 static enum machine_mode
19477 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19479 if (m1 == m2)
19480 return m1;
19482 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19483 return VOIDmode;
19485 if ((m1 == CCGCmode && m2 == CCGOCmode)
19486 || (m1 == CCGOCmode && m2 == CCGCmode))
19487 return CCGCmode;
19489 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19490 return m2;
19491 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19492 return m1;
19494 switch (m1)
19496 default:
19497 gcc_unreachable ();
19499 case CCmode:
19500 case CCGCmode:
19501 case CCGOCmode:
19502 case CCNOmode:
19503 case CCAmode:
19504 case CCCmode:
19505 case CCOmode:
19506 case CCSmode:
19507 case CCZmode:
19508 switch (m2)
19510 default:
19511 return VOIDmode;
19513 case CCmode:
19514 case CCGCmode:
19515 case CCGOCmode:
19516 case CCNOmode:
19517 case CCAmode:
19518 case CCCmode:
19519 case CCOmode:
19520 case CCSmode:
19521 case CCZmode:
19522 return CCmode;
19525 case CCFPmode:
19526 case CCFPUmode:
19527 /* These are only compatible with themselves, which we already
19528 checked above. */
19529 return VOIDmode;
19534 /* Return a comparison we can do and that it is equivalent to
19535 swap_condition (code) apart possibly from orderedness.
19536 But, never change orderedness if TARGET_IEEE_FP, returning
19537 UNKNOWN in that case if necessary. */
19539 static enum rtx_code
19540 ix86_fp_swap_condition (enum rtx_code code)
19542 switch (code)
19544 case GT: /* GTU - CF=0 & ZF=0 */
19545 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19546 case GE: /* GEU - CF=0 */
19547 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19548 case UNLT: /* LTU - CF=1 */
19549 return TARGET_IEEE_FP ? UNKNOWN : GT;
19550 case UNLE: /* LEU - CF=1 | ZF=1 */
19551 return TARGET_IEEE_FP ? UNKNOWN : GE;
19552 default:
19553 return swap_condition (code);
19557 /* Return cost of comparison CODE using the best strategy for performance.
19558 All following functions do use number of instructions as a cost metrics.
19559 In future this should be tweaked to compute bytes for optimize_size and
19560 take into account performance of various instructions on various CPUs. */
19562 static int
19563 ix86_fp_comparison_cost (enum rtx_code code)
19565 int arith_cost;
19567 /* The cost of code using bit-twiddling on %ah. */
19568 switch (code)
19570 case UNLE:
19571 case UNLT:
19572 case LTGT:
19573 case GT:
19574 case GE:
19575 case UNORDERED:
19576 case ORDERED:
19577 case UNEQ:
19578 arith_cost = 4;
19579 break;
19580 case LT:
19581 case NE:
19582 case EQ:
19583 case UNGE:
19584 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19585 break;
19586 case LE:
19587 case UNGT:
19588 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19589 break;
19590 default:
19591 gcc_unreachable ();
19594 switch (ix86_fp_comparison_strategy (code))
19596 case IX86_FPCMP_COMI:
19597 return arith_cost > 4 ? 3 : 2;
19598 case IX86_FPCMP_SAHF:
19599 return arith_cost > 4 ? 4 : 3;
19600 default:
19601 return arith_cost;
19605 /* Return strategy to use for floating-point. We assume that fcomi is always
19606 preferrable where available, since that is also true when looking at size
19607 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19609 enum ix86_fpcmp_strategy
19610 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19612 /* Do fcomi/sahf based test when profitable. */
19614 if (TARGET_CMOVE)
19615 return IX86_FPCMP_COMI;
19617 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19618 return IX86_FPCMP_SAHF;
19620 return IX86_FPCMP_ARITH;
19623 /* Swap, force into registers, or otherwise massage the two operands
19624 to a fp comparison. The operands are updated in place; the new
19625 comparison code is returned. */
19627 static enum rtx_code
19628 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19630 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19631 rtx op0 = *pop0, op1 = *pop1;
19632 enum machine_mode op_mode = GET_MODE (op0);
19633 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19635 /* All of the unordered compare instructions only work on registers.
19636 The same is true of the fcomi compare instructions. The XFmode
19637 compare instructions require registers except when comparing
19638 against zero or when converting operand 1 from fixed point to
19639 floating point. */
19641 if (!is_sse
19642 && (fpcmp_mode == CCFPUmode
19643 || (op_mode == XFmode
19644 && ! (standard_80387_constant_p (op0) == 1
19645 || standard_80387_constant_p (op1) == 1)
19646 && GET_CODE (op1) != FLOAT)
19647 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19649 op0 = force_reg (op_mode, op0);
19650 op1 = force_reg (op_mode, op1);
19652 else
19654 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19655 things around if they appear profitable, otherwise force op0
19656 into a register. */
19658 if (standard_80387_constant_p (op0) == 0
19659 || (MEM_P (op0)
19660 && ! (standard_80387_constant_p (op1) == 0
19661 || MEM_P (op1))))
19663 enum rtx_code new_code = ix86_fp_swap_condition (code);
19664 if (new_code != UNKNOWN)
19666 rtx tmp;
19667 tmp = op0, op0 = op1, op1 = tmp;
19668 code = new_code;
19672 if (!REG_P (op0))
19673 op0 = force_reg (op_mode, op0);
19675 if (CONSTANT_P (op1))
19677 int tmp = standard_80387_constant_p (op1);
19678 if (tmp == 0)
19679 op1 = validize_mem (force_const_mem (op_mode, op1));
19680 else if (tmp == 1)
19682 if (TARGET_CMOVE)
19683 op1 = force_reg (op_mode, op1);
19685 else
19686 op1 = force_reg (op_mode, op1);
19690 /* Try to rearrange the comparison to make it cheaper. */
19691 if (ix86_fp_comparison_cost (code)
19692 > ix86_fp_comparison_cost (swap_condition (code))
19693 && (REG_P (op1) || can_create_pseudo_p ()))
19695 rtx tmp;
19696 tmp = op0, op0 = op1, op1 = tmp;
19697 code = swap_condition (code);
19698 if (!REG_P (op0))
19699 op0 = force_reg (op_mode, op0);
19702 *pop0 = op0;
19703 *pop1 = op1;
19704 return code;
19707 /* Convert comparison codes we use to represent FP comparison to integer
19708 code that will result in proper branch. Return UNKNOWN if no such code
19709 is available. */
19711 enum rtx_code
19712 ix86_fp_compare_code_to_integer (enum rtx_code code)
19714 switch (code)
19716 case GT:
19717 return GTU;
19718 case GE:
19719 return GEU;
19720 case ORDERED:
19721 case UNORDERED:
19722 return code;
19723 break;
19724 case UNEQ:
19725 return EQ;
19726 break;
19727 case UNLT:
19728 return LTU;
19729 break;
19730 case UNLE:
19731 return LEU;
19732 break;
19733 case LTGT:
19734 return NE;
19735 break;
19736 default:
19737 return UNKNOWN;
19741 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19743 static rtx
19744 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19746 enum machine_mode fpcmp_mode, intcmp_mode;
19747 rtx tmp, tmp2;
19749 fpcmp_mode = ix86_fp_compare_mode (code);
19750 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19752 /* Do fcomi/sahf based test when profitable. */
19753 switch (ix86_fp_comparison_strategy (code))
19755 case IX86_FPCMP_COMI:
19756 intcmp_mode = fpcmp_mode;
19757 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19758 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19759 tmp);
19760 emit_insn (tmp);
19761 break;
19763 case IX86_FPCMP_SAHF:
19764 intcmp_mode = fpcmp_mode;
19765 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19766 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19767 tmp);
19769 if (!scratch)
19770 scratch = gen_reg_rtx (HImode);
19771 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19772 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19773 break;
19775 case IX86_FPCMP_ARITH:
19776 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19777 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19778 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19779 if (!scratch)
19780 scratch = gen_reg_rtx (HImode);
19781 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19783 /* In the unordered case, we have to check C2 for NaN's, which
19784 doesn't happen to work out to anything nice combination-wise.
19785 So do some bit twiddling on the value we've got in AH to come
19786 up with an appropriate set of condition codes. */
19788 intcmp_mode = CCNOmode;
19789 switch (code)
19791 case GT:
19792 case UNGT:
19793 if (code == GT || !TARGET_IEEE_FP)
19795 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19796 code = EQ;
19798 else
19800 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19801 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19802 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19803 intcmp_mode = CCmode;
19804 code = GEU;
19806 break;
19807 case LT:
19808 case UNLT:
19809 if (code == LT && TARGET_IEEE_FP)
19811 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19812 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19813 intcmp_mode = CCmode;
19814 code = EQ;
19816 else
19818 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19819 code = NE;
19821 break;
19822 case GE:
19823 case UNGE:
19824 if (code == GE || !TARGET_IEEE_FP)
19826 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19827 code = EQ;
19829 else
19831 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19832 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19833 code = NE;
19835 break;
19836 case LE:
19837 case UNLE:
19838 if (code == LE && TARGET_IEEE_FP)
19840 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19841 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19842 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19843 intcmp_mode = CCmode;
19844 code = LTU;
19846 else
19848 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19849 code = NE;
19851 break;
19852 case EQ:
19853 case UNEQ:
19854 if (code == EQ && TARGET_IEEE_FP)
19856 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19857 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19858 intcmp_mode = CCmode;
19859 code = EQ;
19861 else
19863 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19864 code = NE;
19866 break;
19867 case NE:
19868 case LTGT:
19869 if (code == NE && TARGET_IEEE_FP)
19871 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19872 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19873 GEN_INT (0x40)));
19874 code = NE;
19876 else
19878 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19879 code = EQ;
19881 break;
19883 case UNORDERED:
19884 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19885 code = NE;
19886 break;
19887 case ORDERED:
19888 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19889 code = EQ;
19890 break;
19892 default:
19893 gcc_unreachable ();
19895 break;
19897 default:
19898 gcc_unreachable();
19901 /* Return the test that should be put into the flags user, i.e.
19902 the bcc, scc, or cmov instruction. */
19903 return gen_rtx_fmt_ee (code, VOIDmode,
19904 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19905 const0_rtx);
19908 static rtx
19909 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19911 rtx ret;
19913 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19914 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19916 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19918 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19919 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19921 else
19922 ret = ix86_expand_int_compare (code, op0, op1);
19924 return ret;
19927 void
19928 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19930 enum machine_mode mode = GET_MODE (op0);
19931 rtx tmp;
19933 switch (mode)
19935 case SFmode:
19936 case DFmode:
19937 case XFmode:
19938 case QImode:
19939 case HImode:
19940 case SImode:
19941 simple:
19942 tmp = ix86_expand_compare (code, op0, op1);
19943 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19944 gen_rtx_LABEL_REF (VOIDmode, label),
19945 pc_rtx);
19946 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19947 return;
19949 case DImode:
19950 if (TARGET_64BIT)
19951 goto simple;
19952 case TImode:
19953 /* Expand DImode branch into multiple compare+branch. */
19955 rtx lo[2], hi[2], label2;
19956 enum rtx_code code1, code2, code3;
19957 enum machine_mode submode;
19959 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19961 tmp = op0, op0 = op1, op1 = tmp;
19962 code = swap_condition (code);
19965 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19966 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19968 submode = mode == DImode ? SImode : DImode;
19970 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19971 avoid two branches. This costs one extra insn, so disable when
19972 optimizing for size. */
19974 if ((code == EQ || code == NE)
19975 && (!optimize_insn_for_size_p ()
19976 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19978 rtx xor0, xor1;
19980 xor1 = hi[0];
19981 if (hi[1] != const0_rtx)
19982 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19983 NULL_RTX, 0, OPTAB_WIDEN);
19985 xor0 = lo[0];
19986 if (lo[1] != const0_rtx)
19987 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19988 NULL_RTX, 0, OPTAB_WIDEN);
19990 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19991 NULL_RTX, 0, OPTAB_WIDEN);
19993 ix86_expand_branch (code, tmp, const0_rtx, label);
19994 return;
19997 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19998 op1 is a constant and the low word is zero, then we can just
19999 examine the high word. Similarly for low word -1 and
20000 less-or-equal-than or greater-than. */
20002 if (CONST_INT_P (hi[1]))
20003 switch (code)
20005 case LT: case LTU: case GE: case GEU:
20006 if (lo[1] == const0_rtx)
20008 ix86_expand_branch (code, hi[0], hi[1], label);
20009 return;
20011 break;
20012 case LE: case LEU: case GT: case GTU:
20013 if (lo[1] == constm1_rtx)
20015 ix86_expand_branch (code, hi[0], hi[1], label);
20016 return;
20018 break;
20019 default:
20020 break;
20023 /* Otherwise, we need two or three jumps. */
20025 label2 = gen_label_rtx ();
20027 code1 = code;
20028 code2 = swap_condition (code);
20029 code3 = unsigned_condition (code);
20031 switch (code)
20033 case LT: case GT: case LTU: case GTU:
20034 break;
20036 case LE: code1 = LT; code2 = GT; break;
20037 case GE: code1 = GT; code2 = LT; break;
20038 case LEU: code1 = LTU; code2 = GTU; break;
20039 case GEU: code1 = GTU; code2 = LTU; break;
20041 case EQ: code1 = UNKNOWN; code2 = NE; break;
20042 case NE: code2 = UNKNOWN; break;
20044 default:
20045 gcc_unreachable ();
20049 * a < b =>
20050 * if (hi(a) < hi(b)) goto true;
20051 * if (hi(a) > hi(b)) goto false;
20052 * if (lo(a) < lo(b)) goto true;
20053 * false:
20056 if (code1 != UNKNOWN)
20057 ix86_expand_branch (code1, hi[0], hi[1], label);
20058 if (code2 != UNKNOWN)
20059 ix86_expand_branch (code2, hi[0], hi[1], label2);
20061 ix86_expand_branch (code3, lo[0], lo[1], label);
20063 if (code2 != UNKNOWN)
20064 emit_label (label2);
20065 return;
20068 default:
20069 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20070 goto simple;
20074 /* Split branch based on floating point condition. */
20075 void
20076 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20077 rtx target1, rtx target2, rtx tmp)
20079 rtx condition;
20080 rtx i;
20082 if (target2 != pc_rtx)
20084 rtx tmp = target2;
20085 code = reverse_condition_maybe_unordered (code);
20086 target2 = target1;
20087 target1 = tmp;
20090 condition = ix86_expand_fp_compare (code, op1, op2,
20091 tmp);
20093 i = emit_jump_insn (gen_rtx_SET
20094 (VOIDmode, pc_rtx,
20095 gen_rtx_IF_THEN_ELSE (VOIDmode,
20096 condition, target1, target2)));
20097 if (split_branch_probability >= 0)
20098 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20101 void
20102 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20104 rtx ret;
20106 gcc_assert (GET_MODE (dest) == QImode);
20108 ret = ix86_expand_compare (code, op0, op1);
20109 PUT_MODE (ret, QImode);
20110 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20113 /* Expand comparison setting or clearing carry flag. Return true when
20114 successful and set pop for the operation. */
20115 static bool
20116 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20118 enum machine_mode mode =
20119 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20121 /* Do not handle double-mode compares that go through special path. */
20122 if (mode == (TARGET_64BIT ? TImode : DImode))
20123 return false;
20125 if (SCALAR_FLOAT_MODE_P (mode))
20127 rtx compare_op, compare_seq;
20129 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20131 /* Shortcut: following common codes never translate
20132 into carry flag compares. */
20133 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20134 || code == ORDERED || code == UNORDERED)
20135 return false;
20137 /* These comparisons require zero flag; swap operands so they won't. */
20138 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20139 && !TARGET_IEEE_FP)
20141 rtx tmp = op0;
20142 op0 = op1;
20143 op1 = tmp;
20144 code = swap_condition (code);
20147 /* Try to expand the comparison and verify that we end up with
20148 carry flag based comparison. This fails to be true only when
20149 we decide to expand comparison using arithmetic that is not
20150 too common scenario. */
20151 start_sequence ();
20152 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20153 compare_seq = get_insns ();
20154 end_sequence ();
20156 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20157 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20158 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20159 else
20160 code = GET_CODE (compare_op);
20162 if (code != LTU && code != GEU)
20163 return false;
20165 emit_insn (compare_seq);
20166 *pop = compare_op;
20167 return true;
20170 if (!INTEGRAL_MODE_P (mode))
20171 return false;
20173 switch (code)
20175 case LTU:
20176 case GEU:
20177 break;
20179 /* Convert a==0 into (unsigned)a<1. */
20180 case EQ:
20181 case NE:
20182 if (op1 != const0_rtx)
20183 return false;
20184 op1 = const1_rtx;
20185 code = (code == EQ ? LTU : GEU);
20186 break;
20188 /* Convert a>b into b<a or a>=b-1. */
20189 case GTU:
20190 case LEU:
20191 if (CONST_INT_P (op1))
20193 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20194 /* Bail out on overflow. We still can swap operands but that
20195 would force loading of the constant into register. */
20196 if (op1 == const0_rtx
20197 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20198 return false;
20199 code = (code == GTU ? GEU : LTU);
20201 else
20203 rtx tmp = op1;
20204 op1 = op0;
20205 op0 = tmp;
20206 code = (code == GTU ? LTU : GEU);
20208 break;
20210 /* Convert a>=0 into (unsigned)a<0x80000000. */
20211 case LT:
20212 case GE:
20213 if (mode == DImode || op1 != const0_rtx)
20214 return false;
20215 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20216 code = (code == LT ? GEU : LTU);
20217 break;
20218 case LE:
20219 case GT:
20220 if (mode == DImode || op1 != constm1_rtx)
20221 return false;
20222 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20223 code = (code == LE ? GEU : LTU);
20224 break;
20226 default:
20227 return false;
20229 /* Swapping operands may cause constant to appear as first operand. */
20230 if (!nonimmediate_operand (op0, VOIDmode))
20232 if (!can_create_pseudo_p ())
20233 return false;
20234 op0 = force_reg (mode, op0);
20236 *pop = ix86_expand_compare (code, op0, op1);
20237 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20238 return true;
20241 bool
20242 ix86_expand_int_movcc (rtx operands[])
20244 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20245 rtx compare_seq, compare_op;
20246 enum machine_mode mode = GET_MODE (operands[0]);
20247 bool sign_bit_compare_p = false;
20248 rtx op0 = XEXP (operands[1], 0);
20249 rtx op1 = XEXP (operands[1], 1);
20251 if (GET_MODE (op0) == TImode
20252 || (GET_MODE (op0) == DImode
20253 && !TARGET_64BIT))
20254 return false;
20256 start_sequence ();
20257 compare_op = ix86_expand_compare (code, op0, op1);
20258 compare_seq = get_insns ();
20259 end_sequence ();
20261 compare_code = GET_CODE (compare_op);
20263 if ((op1 == const0_rtx && (code == GE || code == LT))
20264 || (op1 == constm1_rtx && (code == GT || code == LE)))
20265 sign_bit_compare_p = true;
20267 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20268 HImode insns, we'd be swallowed in word prefix ops. */
20270 if ((mode != HImode || TARGET_FAST_PREFIX)
20271 && (mode != (TARGET_64BIT ? TImode : DImode))
20272 && CONST_INT_P (operands[2])
20273 && CONST_INT_P (operands[3]))
20275 rtx out = operands[0];
20276 HOST_WIDE_INT ct = INTVAL (operands[2]);
20277 HOST_WIDE_INT cf = INTVAL (operands[3]);
20278 HOST_WIDE_INT diff;
20280 diff = ct - cf;
20281 /* Sign bit compares are better done using shifts than we do by using
20282 sbb. */
20283 if (sign_bit_compare_p
20284 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20286 /* Detect overlap between destination and compare sources. */
20287 rtx tmp = out;
20289 if (!sign_bit_compare_p)
20291 rtx flags;
20292 bool fpcmp = false;
20294 compare_code = GET_CODE (compare_op);
20296 flags = XEXP (compare_op, 0);
20298 if (GET_MODE (flags) == CCFPmode
20299 || GET_MODE (flags) == CCFPUmode)
20301 fpcmp = true;
20302 compare_code
20303 = ix86_fp_compare_code_to_integer (compare_code);
20306 /* To simplify rest of code, restrict to the GEU case. */
20307 if (compare_code == LTU)
20309 HOST_WIDE_INT tmp = ct;
20310 ct = cf;
20311 cf = tmp;
20312 compare_code = reverse_condition (compare_code);
20313 code = reverse_condition (code);
20315 else
20317 if (fpcmp)
20318 PUT_CODE (compare_op,
20319 reverse_condition_maybe_unordered
20320 (GET_CODE (compare_op)));
20321 else
20322 PUT_CODE (compare_op,
20323 reverse_condition (GET_CODE (compare_op)));
20325 diff = ct - cf;
20327 if (reg_overlap_mentioned_p (out, op0)
20328 || reg_overlap_mentioned_p (out, op1))
20329 tmp = gen_reg_rtx (mode);
20331 if (mode == DImode)
20332 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20333 else
20334 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20335 flags, compare_op));
20337 else
20339 if (code == GT || code == GE)
20340 code = reverse_condition (code);
20341 else
20343 HOST_WIDE_INT tmp = ct;
20344 ct = cf;
20345 cf = tmp;
20346 diff = ct - cf;
20348 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20351 if (diff == 1)
20354 * cmpl op0,op1
20355 * sbbl dest,dest
20356 * [addl dest, ct]
20358 * Size 5 - 8.
20360 if (ct)
20361 tmp = expand_simple_binop (mode, PLUS,
20362 tmp, GEN_INT (ct),
20363 copy_rtx (tmp), 1, OPTAB_DIRECT);
20365 else if (cf == -1)
20368 * cmpl op0,op1
20369 * sbbl dest,dest
20370 * orl $ct, dest
20372 * Size 8.
20374 tmp = expand_simple_binop (mode, IOR,
20375 tmp, GEN_INT (ct),
20376 copy_rtx (tmp), 1, OPTAB_DIRECT);
20378 else if (diff == -1 && ct)
20381 * cmpl op0,op1
20382 * sbbl dest,dest
20383 * notl dest
20384 * [addl dest, cf]
20386 * Size 8 - 11.
20388 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20389 if (cf)
20390 tmp = expand_simple_binop (mode, PLUS,
20391 copy_rtx (tmp), GEN_INT (cf),
20392 copy_rtx (tmp), 1, OPTAB_DIRECT);
20394 else
20397 * cmpl op0,op1
20398 * sbbl dest,dest
20399 * [notl dest]
20400 * andl cf - ct, dest
20401 * [addl dest, ct]
20403 * Size 8 - 11.
20406 if (cf == 0)
20408 cf = ct;
20409 ct = 0;
20410 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20413 tmp = expand_simple_binop (mode, AND,
20414 copy_rtx (tmp),
20415 gen_int_mode (cf - ct, mode),
20416 copy_rtx (tmp), 1, OPTAB_DIRECT);
20417 if (ct)
20418 tmp = expand_simple_binop (mode, PLUS,
20419 copy_rtx (tmp), GEN_INT (ct),
20420 copy_rtx (tmp), 1, OPTAB_DIRECT);
20423 if (!rtx_equal_p (tmp, out))
20424 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20426 return true;
20429 if (diff < 0)
20431 enum machine_mode cmp_mode = GET_MODE (op0);
20433 HOST_WIDE_INT tmp;
20434 tmp = ct, ct = cf, cf = tmp;
20435 diff = -diff;
20437 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20439 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20441 /* We may be reversing unordered compare to normal compare, that
20442 is not valid in general (we may convert non-trapping condition
20443 to trapping one), however on i386 we currently emit all
20444 comparisons unordered. */
20445 compare_code = reverse_condition_maybe_unordered (compare_code);
20446 code = reverse_condition_maybe_unordered (code);
20448 else
20450 compare_code = reverse_condition (compare_code);
20451 code = reverse_condition (code);
20455 compare_code = UNKNOWN;
20456 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20457 && CONST_INT_P (op1))
20459 if (op1 == const0_rtx
20460 && (code == LT || code == GE))
20461 compare_code = code;
20462 else if (op1 == constm1_rtx)
20464 if (code == LE)
20465 compare_code = LT;
20466 else if (code == GT)
20467 compare_code = GE;
20471 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20472 if (compare_code != UNKNOWN
20473 && GET_MODE (op0) == GET_MODE (out)
20474 && (cf == -1 || ct == -1))
20476 /* If lea code below could be used, only optimize
20477 if it results in a 2 insn sequence. */
20479 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20480 || diff == 3 || diff == 5 || diff == 9)
20481 || (compare_code == LT && ct == -1)
20482 || (compare_code == GE && cf == -1))
20485 * notl op1 (if necessary)
20486 * sarl $31, op1
20487 * orl cf, op1
20489 if (ct != -1)
20491 cf = ct;
20492 ct = -1;
20493 code = reverse_condition (code);
20496 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20498 out = expand_simple_binop (mode, IOR,
20499 out, GEN_INT (cf),
20500 out, 1, OPTAB_DIRECT);
20501 if (out != operands[0])
20502 emit_move_insn (operands[0], out);
20504 return true;
20509 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20510 || diff == 3 || diff == 5 || diff == 9)
20511 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20512 && (mode != DImode
20513 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20516 * xorl dest,dest
20517 * cmpl op1,op2
20518 * setcc dest
20519 * lea cf(dest*(ct-cf)),dest
20521 * Size 14.
20523 * This also catches the degenerate setcc-only case.
20526 rtx tmp;
20527 int nops;
20529 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20531 nops = 0;
20532 /* On x86_64 the lea instruction operates on Pmode, so we need
20533 to get arithmetics done in proper mode to match. */
20534 if (diff == 1)
20535 tmp = copy_rtx (out);
20536 else
20538 rtx out1;
20539 out1 = copy_rtx (out);
20540 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20541 nops++;
20542 if (diff & 1)
20544 tmp = gen_rtx_PLUS (mode, tmp, out1);
20545 nops++;
20548 if (cf != 0)
20550 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20551 nops++;
20553 if (!rtx_equal_p (tmp, out))
20555 if (nops == 1)
20556 out = force_operand (tmp, copy_rtx (out));
20557 else
20558 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20560 if (!rtx_equal_p (out, operands[0]))
20561 emit_move_insn (operands[0], copy_rtx (out));
20563 return true;
20567 * General case: Jumpful:
20568 * xorl dest,dest cmpl op1, op2
20569 * cmpl op1, op2 movl ct, dest
20570 * setcc dest jcc 1f
20571 * decl dest movl cf, dest
20572 * andl (cf-ct),dest 1:
20573 * addl ct,dest
20575 * Size 20. Size 14.
20577 * This is reasonably steep, but branch mispredict costs are
20578 * high on modern cpus, so consider failing only if optimizing
20579 * for space.
20582 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20583 && BRANCH_COST (optimize_insn_for_speed_p (),
20584 false) >= 2)
20586 if (cf == 0)
20588 enum machine_mode cmp_mode = GET_MODE (op0);
20590 cf = ct;
20591 ct = 0;
20593 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20595 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20597 /* We may be reversing unordered compare to normal compare,
20598 that is not valid in general (we may convert non-trapping
20599 condition to trapping one), however on i386 we currently
20600 emit all comparisons unordered. */
20601 code = reverse_condition_maybe_unordered (code);
20603 else
20605 code = reverse_condition (code);
20606 if (compare_code != UNKNOWN)
20607 compare_code = reverse_condition (compare_code);
20611 if (compare_code != UNKNOWN)
20613 /* notl op1 (if needed)
20614 sarl $31, op1
20615 andl (cf-ct), op1
20616 addl ct, op1
20618 For x < 0 (resp. x <= -1) there will be no notl,
20619 so if possible swap the constants to get rid of the
20620 complement.
20621 True/false will be -1/0 while code below (store flag
20622 followed by decrement) is 0/-1, so the constants need
20623 to be exchanged once more. */
20625 if (compare_code == GE || !cf)
20627 code = reverse_condition (code);
20628 compare_code = LT;
20630 else
20632 HOST_WIDE_INT tmp = cf;
20633 cf = ct;
20634 ct = tmp;
20637 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20639 else
20641 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20643 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20644 constm1_rtx,
20645 copy_rtx (out), 1, OPTAB_DIRECT);
20648 out = expand_simple_binop (mode, AND, copy_rtx (out),
20649 gen_int_mode (cf - ct, mode),
20650 copy_rtx (out), 1, OPTAB_DIRECT);
20651 if (ct)
20652 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20653 copy_rtx (out), 1, OPTAB_DIRECT);
20654 if (!rtx_equal_p (out, operands[0]))
20655 emit_move_insn (operands[0], copy_rtx (out));
20657 return true;
20661 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20663 /* Try a few things more with specific constants and a variable. */
20665 optab op;
20666 rtx var, orig_out, out, tmp;
20668 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20669 return false;
20671 /* If one of the two operands is an interesting constant, load a
20672 constant with the above and mask it in with a logical operation. */
20674 if (CONST_INT_P (operands[2]))
20676 var = operands[3];
20677 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20678 operands[3] = constm1_rtx, op = and_optab;
20679 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20680 operands[3] = const0_rtx, op = ior_optab;
20681 else
20682 return false;
20684 else if (CONST_INT_P (operands[3]))
20686 var = operands[2];
20687 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20688 operands[2] = constm1_rtx, op = and_optab;
20689 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20690 operands[2] = const0_rtx, op = ior_optab;
20691 else
20692 return false;
20694 else
20695 return false;
20697 orig_out = operands[0];
20698 tmp = gen_reg_rtx (mode);
20699 operands[0] = tmp;
20701 /* Recurse to get the constant loaded. */
20702 if (ix86_expand_int_movcc (operands) == 0)
20703 return false;
20705 /* Mask in the interesting variable. */
20706 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20707 OPTAB_WIDEN);
20708 if (!rtx_equal_p (out, orig_out))
20709 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20711 return true;
20715 * For comparison with above,
20717 * movl cf,dest
20718 * movl ct,tmp
20719 * cmpl op1,op2
20720 * cmovcc tmp,dest
20722 * Size 15.
20725 if (! nonimmediate_operand (operands[2], mode))
20726 operands[2] = force_reg (mode, operands[2]);
20727 if (! nonimmediate_operand (operands[3], mode))
20728 operands[3] = force_reg (mode, operands[3]);
20730 if (! register_operand (operands[2], VOIDmode)
20731 && (mode == QImode
20732 || ! register_operand (operands[3], VOIDmode)))
20733 operands[2] = force_reg (mode, operands[2]);
20735 if (mode == QImode
20736 && ! register_operand (operands[3], VOIDmode))
20737 operands[3] = force_reg (mode, operands[3]);
20739 emit_insn (compare_seq);
20740 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20741 gen_rtx_IF_THEN_ELSE (mode,
20742 compare_op, operands[2],
20743 operands[3])));
20744 return true;
20747 /* Swap, force into registers, or otherwise massage the two operands
20748 to an sse comparison with a mask result. Thus we differ a bit from
20749 ix86_prepare_fp_compare_args which expects to produce a flags result.
20751 The DEST operand exists to help determine whether to commute commutative
20752 operators. The POP0/POP1 operands are updated in place. The new
20753 comparison code is returned, or UNKNOWN if not implementable. */
20755 static enum rtx_code
20756 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20757 rtx *pop0, rtx *pop1)
20759 rtx tmp;
20761 switch (code)
20763 case LTGT:
20764 case UNEQ:
20765 /* AVX supports all the needed comparisons. */
20766 if (TARGET_AVX)
20767 break;
20768 /* We have no LTGT as an operator. We could implement it with
20769 NE & ORDERED, but this requires an extra temporary. It's
20770 not clear that it's worth it. */
20771 return UNKNOWN;
20773 case LT:
20774 case LE:
20775 case UNGT:
20776 case UNGE:
20777 /* These are supported directly. */
20778 break;
20780 case EQ:
20781 case NE:
20782 case UNORDERED:
20783 case ORDERED:
20784 /* AVX has 3 operand comparisons, no need to swap anything. */
20785 if (TARGET_AVX)
20786 break;
20787 /* For commutative operators, try to canonicalize the destination
20788 operand to be first in the comparison - this helps reload to
20789 avoid extra moves. */
20790 if (!dest || !rtx_equal_p (dest, *pop1))
20791 break;
20792 /* FALLTHRU */
20794 case GE:
20795 case GT:
20796 case UNLE:
20797 case UNLT:
20798 /* These are not supported directly before AVX, and furthermore
20799 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20800 comparison operands to transform into something that is
20801 supported. */
20802 tmp = *pop0;
20803 *pop0 = *pop1;
20804 *pop1 = tmp;
20805 code = swap_condition (code);
20806 break;
20808 default:
20809 gcc_unreachable ();
20812 return code;
20815 /* Detect conditional moves that exactly match min/max operational
20816 semantics. Note that this is IEEE safe, as long as we don't
20817 interchange the operands.
20819 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20820 and TRUE if the operation is successful and instructions are emitted. */
20822 static bool
20823 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20824 rtx cmp_op1, rtx if_true, rtx if_false)
20826 enum machine_mode mode;
20827 bool is_min;
20828 rtx tmp;
20830 if (code == LT)
20832 else if (code == UNGE)
20834 tmp = if_true;
20835 if_true = if_false;
20836 if_false = tmp;
20838 else
20839 return false;
20841 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20842 is_min = true;
20843 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20844 is_min = false;
20845 else
20846 return false;
20848 mode = GET_MODE (dest);
20850 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20851 but MODE may be a vector mode and thus not appropriate. */
20852 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20854 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20855 rtvec v;
20857 if_true = force_reg (mode, if_true);
20858 v = gen_rtvec (2, if_true, if_false);
20859 tmp = gen_rtx_UNSPEC (mode, v, u);
20861 else
20863 code = is_min ? SMIN : SMAX;
20864 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20867 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20868 return true;
20871 /* Expand an sse vector comparison. Return the register with the result. */
20873 static rtx
20874 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20875 rtx op_true, rtx op_false)
20877 enum machine_mode mode = GET_MODE (dest);
20878 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20880 /* In general case result of comparison can differ from operands' type. */
20881 enum machine_mode cmp_mode;
20883 /* In AVX512F the result of comparison is an integer mask. */
20884 bool maskcmp = false;
20885 rtx x;
20887 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20889 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20890 gcc_assert (cmp_mode != BLKmode);
20892 maskcmp = true;
20894 else
20895 cmp_mode = cmp_ops_mode;
20898 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20899 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20900 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20902 if (optimize
20903 || reg_overlap_mentioned_p (dest, op_true)
20904 || reg_overlap_mentioned_p (dest, op_false))
20905 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20907 /* Compare patterns for int modes are unspec in AVX512F only. */
20908 if (maskcmp && (code == GT || code == EQ))
20910 rtx (*gen)(rtx, rtx, rtx);
20912 switch (cmp_ops_mode)
20914 case V16SImode:
20915 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20916 break;
20917 case V8DImode:
20918 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20919 break;
20920 default:
20921 gen = NULL;
20924 if (gen)
20926 emit_insn (gen (dest, cmp_op0, cmp_op1));
20927 return dest;
20930 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20932 if (cmp_mode != mode && !maskcmp)
20934 x = force_reg (cmp_ops_mode, x);
20935 convert_move (dest, x, false);
20937 else
20938 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20940 return dest;
20943 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20944 operations. This is used for both scalar and vector conditional moves. */
20946 static void
20947 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20949 enum machine_mode mode = GET_MODE (dest);
20950 enum machine_mode cmpmode = GET_MODE (cmp);
20952 /* In AVX512F the result of comparison is an integer mask. */
20953 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20955 rtx t2, t3, x;
20957 if (vector_all_ones_operand (op_true, mode)
20958 && rtx_equal_p (op_false, CONST0_RTX (mode))
20959 && !maskcmp)
20961 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20963 else if (op_false == CONST0_RTX (mode)
20964 && !maskcmp)
20966 op_true = force_reg (mode, op_true);
20967 x = gen_rtx_AND (mode, cmp, op_true);
20968 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20970 else if (op_true == CONST0_RTX (mode)
20971 && !maskcmp)
20973 op_false = force_reg (mode, op_false);
20974 x = gen_rtx_NOT (mode, cmp);
20975 x = gen_rtx_AND (mode, x, op_false);
20976 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20978 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20979 && !maskcmp)
20981 op_false = force_reg (mode, op_false);
20982 x = gen_rtx_IOR (mode, cmp, op_false);
20983 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20985 else if (TARGET_XOP
20986 && !maskcmp)
20988 op_true = force_reg (mode, op_true);
20990 if (!nonimmediate_operand (op_false, mode))
20991 op_false = force_reg (mode, op_false);
20993 emit_insn (gen_rtx_SET (mode, dest,
20994 gen_rtx_IF_THEN_ELSE (mode, cmp,
20995 op_true,
20996 op_false)));
20998 else
21000 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21001 rtx d = dest;
21003 if (!nonimmediate_operand (op_true, mode))
21004 op_true = force_reg (mode, op_true);
21006 op_false = force_reg (mode, op_false);
21008 switch (mode)
21010 case V4SFmode:
21011 if (TARGET_SSE4_1)
21012 gen = gen_sse4_1_blendvps;
21013 break;
21014 case V2DFmode:
21015 if (TARGET_SSE4_1)
21016 gen = gen_sse4_1_blendvpd;
21017 break;
21018 case V16QImode:
21019 case V8HImode:
21020 case V4SImode:
21021 case V2DImode:
21022 if (TARGET_SSE4_1)
21024 gen = gen_sse4_1_pblendvb;
21025 if (mode != V16QImode)
21026 d = gen_reg_rtx (V16QImode);
21027 op_false = gen_lowpart (V16QImode, op_false);
21028 op_true = gen_lowpart (V16QImode, op_true);
21029 cmp = gen_lowpart (V16QImode, cmp);
21031 break;
21032 case V8SFmode:
21033 if (TARGET_AVX)
21034 gen = gen_avx_blendvps256;
21035 break;
21036 case V4DFmode:
21037 if (TARGET_AVX)
21038 gen = gen_avx_blendvpd256;
21039 break;
21040 case V32QImode:
21041 case V16HImode:
21042 case V8SImode:
21043 case V4DImode:
21044 if (TARGET_AVX2)
21046 gen = gen_avx2_pblendvb;
21047 if (mode != V32QImode)
21048 d = gen_reg_rtx (V32QImode);
21049 op_false = gen_lowpart (V32QImode, op_false);
21050 op_true = gen_lowpart (V32QImode, op_true);
21051 cmp = gen_lowpart (V32QImode, cmp);
21053 break;
21055 case V64QImode:
21056 gen = gen_avx512bw_blendmv64qi;
21057 break;
21058 case V32HImode:
21059 gen = gen_avx512bw_blendmv32hi;
21060 break;
21061 case V16SImode:
21062 gen = gen_avx512f_blendmv16si;
21063 break;
21064 case V8DImode:
21065 gen = gen_avx512f_blendmv8di;
21066 break;
21067 case V8DFmode:
21068 gen = gen_avx512f_blendmv8df;
21069 break;
21070 case V16SFmode:
21071 gen = gen_avx512f_blendmv16sf;
21072 break;
21074 default:
21075 break;
21078 if (gen != NULL)
21080 emit_insn (gen (d, op_false, op_true, cmp));
21081 if (d != dest)
21082 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21084 else
21086 op_true = force_reg (mode, op_true);
21088 t2 = gen_reg_rtx (mode);
21089 if (optimize)
21090 t3 = gen_reg_rtx (mode);
21091 else
21092 t3 = dest;
21094 x = gen_rtx_AND (mode, op_true, cmp);
21095 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21097 x = gen_rtx_NOT (mode, cmp);
21098 x = gen_rtx_AND (mode, x, op_false);
21099 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21101 x = gen_rtx_IOR (mode, t3, t2);
21102 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21107 /* Expand a floating-point conditional move. Return true if successful. */
21109 bool
21110 ix86_expand_fp_movcc (rtx operands[])
21112 enum machine_mode mode = GET_MODE (operands[0]);
21113 enum rtx_code code = GET_CODE (operands[1]);
21114 rtx tmp, compare_op;
21115 rtx op0 = XEXP (operands[1], 0);
21116 rtx op1 = XEXP (operands[1], 1);
21118 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21120 enum machine_mode cmode;
21122 /* Since we've no cmove for sse registers, don't force bad register
21123 allocation just to gain access to it. Deny movcc when the
21124 comparison mode doesn't match the move mode. */
21125 cmode = GET_MODE (op0);
21126 if (cmode == VOIDmode)
21127 cmode = GET_MODE (op1);
21128 if (cmode != mode)
21129 return false;
21131 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21132 if (code == UNKNOWN)
21133 return false;
21135 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21136 operands[2], operands[3]))
21137 return true;
21139 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21140 operands[2], operands[3]);
21141 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21142 return true;
21145 if (GET_MODE (op0) == TImode
21146 || (GET_MODE (op0) == DImode
21147 && !TARGET_64BIT))
21148 return false;
21150 /* The floating point conditional move instructions don't directly
21151 support conditions resulting from a signed integer comparison. */
21153 compare_op = ix86_expand_compare (code, op0, op1);
21154 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21156 tmp = gen_reg_rtx (QImode);
21157 ix86_expand_setcc (tmp, code, op0, op1);
21159 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21162 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21163 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21164 operands[2], operands[3])));
21166 return true;
21169 /* Expand a floating-point vector conditional move; a vcond operation
21170 rather than a movcc operation. */
21172 bool
21173 ix86_expand_fp_vcond (rtx operands[])
21175 enum rtx_code code = GET_CODE (operands[3]);
21176 rtx cmp;
21178 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21179 &operands[4], &operands[5]);
21180 if (code == UNKNOWN)
21182 rtx temp;
21183 switch (GET_CODE (operands[3]))
21185 case LTGT:
21186 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21187 operands[5], operands[0], operands[0]);
21188 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21189 operands[5], operands[1], operands[2]);
21190 code = AND;
21191 break;
21192 case UNEQ:
21193 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21194 operands[5], operands[0], operands[0]);
21195 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21196 operands[5], operands[1], operands[2]);
21197 code = IOR;
21198 break;
21199 default:
21200 gcc_unreachable ();
21202 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21203 OPTAB_DIRECT);
21204 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21205 return true;
21208 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21209 operands[5], operands[1], operands[2]))
21210 return true;
21212 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21213 operands[1], operands[2]);
21214 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21215 return true;
21218 /* Expand a signed/unsigned integral vector conditional move. */
21220 bool
21221 ix86_expand_int_vcond (rtx operands[])
21223 enum machine_mode data_mode = GET_MODE (operands[0]);
21224 enum machine_mode mode = GET_MODE (operands[4]);
21225 enum rtx_code code = GET_CODE (operands[3]);
21226 bool negate = false;
21227 rtx x, cop0, cop1;
21229 cop0 = operands[4];
21230 cop1 = operands[5];
21232 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21233 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21234 if ((code == LT || code == GE)
21235 && data_mode == mode
21236 && cop1 == CONST0_RTX (mode)
21237 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21238 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21239 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21240 && (GET_MODE_SIZE (data_mode) == 16
21241 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21243 rtx negop = operands[2 - (code == LT)];
21244 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21245 if (negop == CONST1_RTX (data_mode))
21247 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21248 operands[0], 1, OPTAB_DIRECT);
21249 if (res != operands[0])
21250 emit_move_insn (operands[0], res);
21251 return true;
21253 else if (GET_MODE_INNER (data_mode) != DImode
21254 && vector_all_ones_operand (negop, data_mode))
21256 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21257 operands[0], 0, OPTAB_DIRECT);
21258 if (res != operands[0])
21259 emit_move_insn (operands[0], res);
21260 return true;
21264 if (!nonimmediate_operand (cop1, mode))
21265 cop1 = force_reg (mode, cop1);
21266 if (!general_operand (operands[1], data_mode))
21267 operands[1] = force_reg (data_mode, operands[1]);
21268 if (!general_operand (operands[2], data_mode))
21269 operands[2] = force_reg (data_mode, operands[2]);
21271 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21272 if (TARGET_XOP
21273 && (mode == V16QImode || mode == V8HImode
21274 || mode == V4SImode || mode == V2DImode))
21276 else
21278 /* Canonicalize the comparison to EQ, GT, GTU. */
21279 switch (code)
21281 case EQ:
21282 case GT:
21283 case GTU:
21284 break;
21286 case NE:
21287 case LE:
21288 case LEU:
21289 code = reverse_condition (code);
21290 negate = true;
21291 break;
21293 case GE:
21294 case GEU:
21295 code = reverse_condition (code);
21296 negate = true;
21297 /* FALLTHRU */
21299 case LT:
21300 case LTU:
21301 code = swap_condition (code);
21302 x = cop0, cop0 = cop1, cop1 = x;
21303 break;
21305 default:
21306 gcc_unreachable ();
21309 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21310 if (mode == V2DImode)
21312 switch (code)
21314 case EQ:
21315 /* SSE4.1 supports EQ. */
21316 if (!TARGET_SSE4_1)
21317 return false;
21318 break;
21320 case GT:
21321 case GTU:
21322 /* SSE4.2 supports GT/GTU. */
21323 if (!TARGET_SSE4_2)
21324 return false;
21325 break;
21327 default:
21328 gcc_unreachable ();
21332 /* Unsigned parallel compare is not supported by the hardware.
21333 Play some tricks to turn this into a signed comparison
21334 against 0. */
21335 if (code == GTU)
21337 cop0 = force_reg (mode, cop0);
21339 switch (mode)
21341 case V16SImode:
21342 case V8DImode:
21343 case V8SImode:
21344 case V4DImode:
21345 case V4SImode:
21346 case V2DImode:
21348 rtx t1, t2, mask;
21349 rtx (*gen_sub3) (rtx, rtx, rtx);
21351 switch (mode)
21353 case V16SImode: gen_sub3 = gen_subv16si3; break;
21354 case V8DImode: gen_sub3 = gen_subv8di3; break;
21355 case V8SImode: gen_sub3 = gen_subv8si3; break;
21356 case V4DImode: gen_sub3 = gen_subv4di3; break;
21357 case V4SImode: gen_sub3 = gen_subv4si3; break;
21358 case V2DImode: gen_sub3 = gen_subv2di3; break;
21359 default:
21360 gcc_unreachable ();
21362 /* Subtract (-(INT MAX) - 1) from both operands to make
21363 them signed. */
21364 mask = ix86_build_signbit_mask (mode, true, false);
21365 t1 = gen_reg_rtx (mode);
21366 emit_insn (gen_sub3 (t1, cop0, mask));
21368 t2 = gen_reg_rtx (mode);
21369 emit_insn (gen_sub3 (t2, cop1, mask));
21371 cop0 = t1;
21372 cop1 = t2;
21373 code = GT;
21375 break;
21377 case V64QImode:
21378 case V32HImode:
21379 case V32QImode:
21380 case V16HImode:
21381 case V16QImode:
21382 case V8HImode:
21383 /* Perform a parallel unsigned saturating subtraction. */
21384 x = gen_reg_rtx (mode);
21385 emit_insn (gen_rtx_SET (VOIDmode, x,
21386 gen_rtx_US_MINUS (mode, cop0, cop1)));
21388 cop0 = x;
21389 cop1 = CONST0_RTX (mode);
21390 code = EQ;
21391 negate = !negate;
21392 break;
21394 default:
21395 gcc_unreachable ();
21400 /* Allow the comparison to be done in one mode, but the movcc to
21401 happen in another mode. */
21402 if (data_mode == mode)
21404 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21405 operands[1+negate], operands[2-negate]);
21407 else
21409 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21410 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21411 operands[1+negate], operands[2-negate]);
21412 if (GET_MODE (x) == mode)
21413 x = gen_lowpart (data_mode, x);
21416 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21417 operands[2-negate]);
21418 return true;
21421 static bool
21422 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21424 enum machine_mode mode = GET_MODE (op0);
21425 switch (mode)
21427 /* There is no byte version of vpermi2. So we use vpermi2w. */
21428 case V64QImode:
21429 if (!TARGET_AVX512BW)
21430 return false;
21431 rtx mask_lowpart, op0_lowpart, op1_lowpart;
21432 rtx perm_lo, perm_hi, tmp, res_lo, tmp2, res_hi;
21434 mask_lowpart = gen_lowpart (V32HImode, force_reg (V64QImode, mask));
21435 op0_lowpart = gen_lowpart (V32HImode, op0);
21436 op1_lowpart = gen_lowpart (V32HImode, op1);
21437 tmp = gen_reg_rtx (V32HImode);
21438 tmp2 = gen_reg_rtx (V32HImode);
21439 perm_lo = gen_reg_rtx (V32HImode);
21440 perm_hi = gen_reg_rtx (V32HImode);
21441 res_lo = gen_reg_rtx (V32HImode);
21442 res_hi = gen_reg_rtx (V32HImode);
21444 emit_insn (gen_ashlv32hi3 (tmp, mask_lowpart, GEN_INT (8)));
21445 emit_insn (gen_ashrv32hi3 (perm_lo, tmp, GEN_INT (9)));
21446 emit_insn (gen_ashrv32hi3 (perm_hi, mask_lowpart, GEN_INT (9)));
21447 emit_insn (gen_avx512bw_vpermi2varv32hi3 (res_lo, op0_lowpart,
21448 perm_lo, op1_lowpart));
21449 emit_insn (gen_avx512bw_vpermi2varv32hi3 (tmp2, op0_lowpart,
21450 perm_hi, op1_lowpart));
21451 emit_insn (gen_ashlv32hi3 (res_hi, tmp2, GEN_INT (8)));
21452 emit_insn (gen_avx512bw_blendmv64qi (target, gen_lowpart (V64QImode, res_lo),
21453 gen_lowpart (V64QImode, res_hi),
21454 force_reg (DImode, GEN_INT (0xAAAAAAAAAAAAAAAALL))));
21455 return true;
21456 case V8HImode:
21457 if (!TARGET_AVX512VL)
21458 return false;
21459 emit_insn (gen_avx512vl_vpermi2varv8hi3 (target, op0,
21460 force_reg (V8HImode, mask), op1));
21461 return true;
21462 case V16HImode:
21463 if (!TARGET_AVX512VL)
21464 return false;
21465 emit_insn (gen_avx512vl_vpermi2varv16hi3 (target, op0,
21466 force_reg (V16HImode, mask), op1));
21467 return true;
21468 case V32HImode:
21469 emit_insn (gen_avx512bw_vpermi2varv32hi3 (target, op0,
21470 force_reg (V32HImode, mask), op1));
21471 return true;
21472 case V4SImode:
21473 if (!TARGET_AVX512VL)
21474 return false;
21475 emit_insn (gen_avx512vl_vpermi2varv4si3 (target, op0,
21476 force_reg (V4SImode, mask), op1));
21477 return true;
21478 case V8SImode:
21479 if (!TARGET_AVX512VL)
21480 return false;
21481 emit_insn (gen_avx512vl_vpermi2varv8si3 (target, op0,
21482 force_reg (V8SImode, mask), op1));
21483 return true;
21484 case V16SImode:
21485 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21486 force_reg (V16SImode, mask),
21487 op1));
21488 return true;
21489 case V4SFmode:
21490 if (!TARGET_AVX512VL)
21491 return false;
21492 emit_insn (gen_avx512vl_vpermi2varv4sf3 (target, op0,
21493 force_reg (V4SImode, mask), op1));
21494 return true;
21495 case V8SFmode:
21496 if (!TARGET_AVX512VL)
21497 return false;
21498 emit_insn (gen_avx512vl_vpermi2varv8sf3 (target, op0,
21499 force_reg (V8SImode, mask), op1));
21500 return true;
21501 case V16SFmode:
21502 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21503 force_reg (V16SImode, mask),
21504 op1));
21505 return true;
21506 case V2DImode:
21507 if (!TARGET_AVX512VL)
21508 return false;
21509 emit_insn (gen_avx512vl_vpermi2varv2di3 (target, op0,
21510 force_reg (V2DImode, mask), op1));
21511 return true;
21512 case V4DImode:
21513 if (!TARGET_AVX512VL)
21514 return false;
21515 emit_insn (gen_avx512vl_vpermi2varv4di3 (target, op0,
21516 force_reg (V4DImode, mask), op1));
21517 return true;
21518 case V8DImode:
21519 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21520 force_reg (V8DImode, mask), op1));
21521 return true;
21522 case V2DFmode:
21523 if (!TARGET_AVX512VL)
21524 return false;
21525 emit_insn (gen_avx512vl_vpermi2varv2df3 (target, op0,
21526 force_reg (V2DImode, mask), op1));
21527 return true;
21528 case V4DFmode:
21529 if (!TARGET_AVX512VL)
21530 return false;
21531 emit_insn (gen_avx512vl_vpermi2varv4df3 (target, op0,
21532 force_reg (V4DImode, mask), op1));
21533 return true;
21534 case V8DFmode:
21535 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21536 force_reg (V8DImode, mask), op1));
21537 return true;
21538 default:
21539 return false;
21543 /* Expand a variable vector permutation. */
21545 void
21546 ix86_expand_vec_perm (rtx operands[])
21548 rtx target = operands[0];
21549 rtx op0 = operands[1];
21550 rtx op1 = operands[2];
21551 rtx mask = operands[3];
21552 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21553 enum machine_mode mode = GET_MODE (op0);
21554 enum machine_mode maskmode = GET_MODE (mask);
21555 int w, e, i;
21556 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21558 /* Number of elements in the vector. */
21559 w = GET_MODE_NUNITS (mode);
21560 e = GET_MODE_UNIT_SIZE (mode);
21561 gcc_assert (w <= 64);
21563 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21564 return;
21566 if (TARGET_AVX2)
21568 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21570 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21571 an constant shuffle operand. With a tiny bit of effort we can
21572 use VPERMD instead. A re-interpretation stall for V4DFmode is
21573 unfortunate but there's no avoiding it.
21574 Similarly for V16HImode we don't have instructions for variable
21575 shuffling, while for V32QImode we can use after preparing suitable
21576 masks vpshufb; vpshufb; vpermq; vpor. */
21578 if (mode == V16HImode)
21580 maskmode = mode = V32QImode;
21581 w = 32;
21582 e = 1;
21584 else
21586 maskmode = mode = V8SImode;
21587 w = 8;
21588 e = 4;
21590 t1 = gen_reg_rtx (maskmode);
21592 /* Replicate the low bits of the V4DImode mask into V8SImode:
21593 mask = { A B C D }
21594 t1 = { A A B B C C D D }. */
21595 for (i = 0; i < w / 2; ++i)
21596 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21597 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21598 vt = force_reg (maskmode, vt);
21599 mask = gen_lowpart (maskmode, mask);
21600 if (maskmode == V8SImode)
21601 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21602 else
21603 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21605 /* Multiply the shuffle indicies by two. */
21606 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21607 OPTAB_DIRECT);
21609 /* Add one to the odd shuffle indicies:
21610 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21611 for (i = 0; i < w / 2; ++i)
21613 vec[i * 2] = const0_rtx;
21614 vec[i * 2 + 1] = const1_rtx;
21616 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21617 vt = validize_mem (force_const_mem (maskmode, vt));
21618 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21619 OPTAB_DIRECT);
21621 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21622 operands[3] = mask = t1;
21623 target = gen_reg_rtx (mode);
21624 op0 = gen_lowpart (mode, op0);
21625 op1 = gen_lowpart (mode, op1);
21628 switch (mode)
21630 case V8SImode:
21631 /* The VPERMD and VPERMPS instructions already properly ignore
21632 the high bits of the shuffle elements. No need for us to
21633 perform an AND ourselves. */
21634 if (one_operand_shuffle)
21636 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21637 if (target != operands[0])
21638 emit_move_insn (operands[0],
21639 gen_lowpart (GET_MODE (operands[0]), target));
21641 else
21643 t1 = gen_reg_rtx (V8SImode);
21644 t2 = gen_reg_rtx (V8SImode);
21645 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21646 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21647 goto merge_two;
21649 return;
21651 case V8SFmode:
21652 mask = gen_lowpart (V8SImode, mask);
21653 if (one_operand_shuffle)
21654 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21655 else
21657 t1 = gen_reg_rtx (V8SFmode);
21658 t2 = gen_reg_rtx (V8SFmode);
21659 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21660 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21661 goto merge_two;
21663 return;
21665 case V4SImode:
21666 /* By combining the two 128-bit input vectors into one 256-bit
21667 input vector, we can use VPERMD and VPERMPS for the full
21668 two-operand shuffle. */
21669 t1 = gen_reg_rtx (V8SImode);
21670 t2 = gen_reg_rtx (V8SImode);
21671 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21672 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21673 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21674 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21675 return;
21677 case V4SFmode:
21678 t1 = gen_reg_rtx (V8SFmode);
21679 t2 = gen_reg_rtx (V8SImode);
21680 mask = gen_lowpart (V4SImode, mask);
21681 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21682 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21683 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21684 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21685 return;
21687 case V32QImode:
21688 t1 = gen_reg_rtx (V32QImode);
21689 t2 = gen_reg_rtx (V32QImode);
21690 t3 = gen_reg_rtx (V32QImode);
21691 vt2 = GEN_INT (-128);
21692 for (i = 0; i < 32; i++)
21693 vec[i] = vt2;
21694 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21695 vt = force_reg (V32QImode, vt);
21696 for (i = 0; i < 32; i++)
21697 vec[i] = i < 16 ? vt2 : const0_rtx;
21698 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21699 vt2 = force_reg (V32QImode, vt2);
21700 /* From mask create two adjusted masks, which contain the same
21701 bits as mask in the low 7 bits of each vector element.
21702 The first mask will have the most significant bit clear
21703 if it requests element from the same 128-bit lane
21704 and MSB set if it requests element from the other 128-bit lane.
21705 The second mask will have the opposite values of the MSB,
21706 and additionally will have its 128-bit lanes swapped.
21707 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21708 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21709 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21710 stands for other 12 bytes. */
21711 /* The bit whether element is from the same lane or the other
21712 lane is bit 4, so shift it up by 3 to the MSB position. */
21713 t5 = gen_reg_rtx (V4DImode);
21714 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21715 GEN_INT (3)));
21716 /* Clear MSB bits from the mask just in case it had them set. */
21717 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21718 /* After this t1 will have MSB set for elements from other lane. */
21719 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21720 /* Clear bits other than MSB. */
21721 emit_insn (gen_andv32qi3 (t1, t1, vt));
21722 /* Or in the lower bits from mask into t3. */
21723 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21724 /* And invert MSB bits in t1, so MSB is set for elements from the same
21725 lane. */
21726 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21727 /* Swap 128-bit lanes in t3. */
21728 t6 = gen_reg_rtx (V4DImode);
21729 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21730 const2_rtx, GEN_INT (3),
21731 const0_rtx, const1_rtx));
21732 /* And or in the lower bits from mask into t1. */
21733 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21734 if (one_operand_shuffle)
21736 /* Each of these shuffles will put 0s in places where
21737 element from the other 128-bit lane is needed, otherwise
21738 will shuffle in the requested value. */
21739 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21740 gen_lowpart (V32QImode, t6)));
21741 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21742 /* For t3 the 128-bit lanes are swapped again. */
21743 t7 = gen_reg_rtx (V4DImode);
21744 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21745 const2_rtx, GEN_INT (3),
21746 const0_rtx, const1_rtx));
21747 /* And oring both together leads to the result. */
21748 emit_insn (gen_iorv32qi3 (target, t1,
21749 gen_lowpart (V32QImode, t7)));
21750 if (target != operands[0])
21751 emit_move_insn (operands[0],
21752 gen_lowpart (GET_MODE (operands[0]), target));
21753 return;
21756 t4 = gen_reg_rtx (V32QImode);
21757 /* Similarly to the above one_operand_shuffle code,
21758 just for repeated twice for each operand. merge_two:
21759 code will merge the two results together. */
21760 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21761 gen_lowpart (V32QImode, t6)));
21762 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21763 gen_lowpart (V32QImode, t6)));
21764 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21765 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21766 t7 = gen_reg_rtx (V4DImode);
21767 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21768 const2_rtx, GEN_INT (3),
21769 const0_rtx, const1_rtx));
21770 t8 = gen_reg_rtx (V4DImode);
21771 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21772 const2_rtx, GEN_INT (3),
21773 const0_rtx, const1_rtx));
21774 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21775 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21776 t1 = t4;
21777 t2 = t3;
21778 goto merge_two;
21780 default:
21781 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21782 break;
21786 if (TARGET_XOP)
21788 /* The XOP VPPERM insn supports three inputs. By ignoring the
21789 one_operand_shuffle special case, we avoid creating another
21790 set of constant vectors in memory. */
21791 one_operand_shuffle = false;
21793 /* mask = mask & {2*w-1, ...} */
21794 vt = GEN_INT (2*w - 1);
21796 else
21798 /* mask = mask & {w-1, ...} */
21799 vt = GEN_INT (w - 1);
21802 for (i = 0; i < w; i++)
21803 vec[i] = vt;
21804 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21805 mask = expand_simple_binop (maskmode, AND, mask, vt,
21806 NULL_RTX, 0, OPTAB_DIRECT);
21808 /* For non-QImode operations, convert the word permutation control
21809 into a byte permutation control. */
21810 if (mode != V16QImode)
21812 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21813 GEN_INT (exact_log2 (e)),
21814 NULL_RTX, 0, OPTAB_DIRECT);
21816 /* Convert mask to vector of chars. */
21817 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21819 /* Replicate each of the input bytes into byte positions:
21820 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21821 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21822 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21823 for (i = 0; i < 16; ++i)
21824 vec[i] = GEN_INT (i/e * e);
21825 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21826 vt = validize_mem (force_const_mem (V16QImode, vt));
21827 if (TARGET_XOP)
21828 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21829 else
21830 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21832 /* Convert it into the byte positions by doing
21833 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21834 for (i = 0; i < 16; ++i)
21835 vec[i] = GEN_INT (i % e);
21836 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21837 vt = validize_mem (force_const_mem (V16QImode, vt));
21838 emit_insn (gen_addv16qi3 (mask, mask, vt));
21841 /* The actual shuffle operations all operate on V16QImode. */
21842 op0 = gen_lowpart (V16QImode, op0);
21843 op1 = gen_lowpart (V16QImode, op1);
21845 if (TARGET_XOP)
21847 if (GET_MODE (target) != V16QImode)
21848 target = gen_reg_rtx (V16QImode);
21849 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21850 if (target != operands[0])
21851 emit_move_insn (operands[0],
21852 gen_lowpart (GET_MODE (operands[0]), target));
21854 else if (one_operand_shuffle)
21856 if (GET_MODE (target) != V16QImode)
21857 target = gen_reg_rtx (V16QImode);
21858 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21859 if (target != operands[0])
21860 emit_move_insn (operands[0],
21861 gen_lowpart (GET_MODE (operands[0]), target));
21863 else
21865 rtx xops[6];
21866 bool ok;
21868 /* Shuffle the two input vectors independently. */
21869 t1 = gen_reg_rtx (V16QImode);
21870 t2 = gen_reg_rtx (V16QImode);
21871 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21872 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21874 merge_two:
21875 /* Then merge them together. The key is whether any given control
21876 element contained a bit set that indicates the second word. */
21877 mask = operands[3];
21878 vt = GEN_INT (w);
21879 if (maskmode == V2DImode && !TARGET_SSE4_1)
21881 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21882 more shuffle to convert the V2DI input mask into a V4SI
21883 input mask. At which point the masking that expand_int_vcond
21884 will work as desired. */
21885 rtx t3 = gen_reg_rtx (V4SImode);
21886 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21887 const0_rtx, const0_rtx,
21888 const2_rtx, const2_rtx));
21889 mask = t3;
21890 maskmode = V4SImode;
21891 e = w = 4;
21894 for (i = 0; i < w; i++)
21895 vec[i] = vt;
21896 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21897 vt = force_reg (maskmode, vt);
21898 mask = expand_simple_binop (maskmode, AND, mask, vt,
21899 NULL_RTX, 0, OPTAB_DIRECT);
21901 if (GET_MODE (target) != mode)
21902 target = gen_reg_rtx (mode);
21903 xops[0] = target;
21904 xops[1] = gen_lowpart (mode, t2);
21905 xops[2] = gen_lowpart (mode, t1);
21906 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21907 xops[4] = mask;
21908 xops[5] = vt;
21909 ok = ix86_expand_int_vcond (xops);
21910 gcc_assert (ok);
21911 if (target != operands[0])
21912 emit_move_insn (operands[0],
21913 gen_lowpart (GET_MODE (operands[0]), target));
21917 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21918 true if we should do zero extension, else sign extension. HIGH_P is
21919 true if we want the N/2 high elements, else the low elements. */
21921 void
21922 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21924 enum machine_mode imode = GET_MODE (src);
21925 rtx tmp;
21927 if (TARGET_SSE4_1)
21929 rtx (*unpack)(rtx, rtx);
21930 rtx (*extract)(rtx, rtx) = NULL;
21931 enum machine_mode halfmode = BLKmode;
21933 switch (imode)
21935 case V64QImode:
21936 if (unsigned_p)
21937 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
21938 else
21939 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
21940 halfmode = V32QImode;
21941 extract
21942 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
21943 break;
21944 case V32QImode:
21945 if (unsigned_p)
21946 unpack = gen_avx2_zero_extendv16qiv16hi2;
21947 else
21948 unpack = gen_avx2_sign_extendv16qiv16hi2;
21949 halfmode = V16QImode;
21950 extract
21951 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21952 break;
21953 case V32HImode:
21954 if (unsigned_p)
21955 unpack = gen_avx512f_zero_extendv16hiv16si2;
21956 else
21957 unpack = gen_avx512f_sign_extendv16hiv16si2;
21958 halfmode = V16HImode;
21959 extract
21960 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21961 break;
21962 case V16HImode:
21963 if (unsigned_p)
21964 unpack = gen_avx2_zero_extendv8hiv8si2;
21965 else
21966 unpack = gen_avx2_sign_extendv8hiv8si2;
21967 halfmode = V8HImode;
21968 extract
21969 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21970 break;
21971 case V16SImode:
21972 if (unsigned_p)
21973 unpack = gen_avx512f_zero_extendv8siv8di2;
21974 else
21975 unpack = gen_avx512f_sign_extendv8siv8di2;
21976 halfmode = V8SImode;
21977 extract
21978 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21979 break;
21980 case V8SImode:
21981 if (unsigned_p)
21982 unpack = gen_avx2_zero_extendv4siv4di2;
21983 else
21984 unpack = gen_avx2_sign_extendv4siv4di2;
21985 halfmode = V4SImode;
21986 extract
21987 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21988 break;
21989 case V16QImode:
21990 if (unsigned_p)
21991 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21992 else
21993 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21994 break;
21995 case V8HImode:
21996 if (unsigned_p)
21997 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21998 else
21999 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22000 break;
22001 case V4SImode:
22002 if (unsigned_p)
22003 unpack = gen_sse4_1_zero_extendv2siv2di2;
22004 else
22005 unpack = gen_sse4_1_sign_extendv2siv2di2;
22006 break;
22007 default:
22008 gcc_unreachable ();
22011 if (GET_MODE_SIZE (imode) >= 32)
22013 tmp = gen_reg_rtx (halfmode);
22014 emit_insn (extract (tmp, src));
22016 else if (high_p)
22018 /* Shift higher 8 bytes to lower 8 bytes. */
22019 tmp = gen_reg_rtx (V1TImode);
22020 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22021 GEN_INT (64)));
22022 tmp = gen_lowpart (imode, tmp);
22024 else
22025 tmp = src;
22027 emit_insn (unpack (dest, tmp));
22029 else
22031 rtx (*unpack)(rtx, rtx, rtx);
22033 switch (imode)
22035 case V16QImode:
22036 if (high_p)
22037 unpack = gen_vec_interleave_highv16qi;
22038 else
22039 unpack = gen_vec_interleave_lowv16qi;
22040 break;
22041 case V8HImode:
22042 if (high_p)
22043 unpack = gen_vec_interleave_highv8hi;
22044 else
22045 unpack = gen_vec_interleave_lowv8hi;
22046 break;
22047 case V4SImode:
22048 if (high_p)
22049 unpack = gen_vec_interleave_highv4si;
22050 else
22051 unpack = gen_vec_interleave_lowv4si;
22052 break;
22053 default:
22054 gcc_unreachable ();
22057 if (unsigned_p)
22058 tmp = force_reg (imode, CONST0_RTX (imode));
22059 else
22060 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22061 src, pc_rtx, pc_rtx);
22063 rtx tmp2 = gen_reg_rtx (imode);
22064 emit_insn (unpack (tmp2, src, tmp));
22065 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22069 /* Expand conditional increment or decrement using adb/sbb instructions.
22070 The default case using setcc followed by the conditional move can be
22071 done by generic code. */
22072 bool
22073 ix86_expand_int_addcc (rtx operands[])
22075 enum rtx_code code = GET_CODE (operands[1]);
22076 rtx flags;
22077 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22078 rtx compare_op;
22079 rtx val = const0_rtx;
22080 bool fpcmp = false;
22081 enum machine_mode mode;
22082 rtx op0 = XEXP (operands[1], 0);
22083 rtx op1 = XEXP (operands[1], 1);
22085 if (operands[3] != const1_rtx
22086 && operands[3] != constm1_rtx)
22087 return false;
22088 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22089 return false;
22090 code = GET_CODE (compare_op);
22092 flags = XEXP (compare_op, 0);
22094 if (GET_MODE (flags) == CCFPmode
22095 || GET_MODE (flags) == CCFPUmode)
22097 fpcmp = true;
22098 code = ix86_fp_compare_code_to_integer (code);
22101 if (code != LTU)
22103 val = constm1_rtx;
22104 if (fpcmp)
22105 PUT_CODE (compare_op,
22106 reverse_condition_maybe_unordered
22107 (GET_CODE (compare_op)));
22108 else
22109 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22112 mode = GET_MODE (operands[0]);
22114 /* Construct either adc or sbb insn. */
22115 if ((code == LTU) == (operands[3] == constm1_rtx))
22117 switch (mode)
22119 case QImode:
22120 insn = gen_subqi3_carry;
22121 break;
22122 case HImode:
22123 insn = gen_subhi3_carry;
22124 break;
22125 case SImode:
22126 insn = gen_subsi3_carry;
22127 break;
22128 case DImode:
22129 insn = gen_subdi3_carry;
22130 break;
22131 default:
22132 gcc_unreachable ();
22135 else
22137 switch (mode)
22139 case QImode:
22140 insn = gen_addqi3_carry;
22141 break;
22142 case HImode:
22143 insn = gen_addhi3_carry;
22144 break;
22145 case SImode:
22146 insn = gen_addsi3_carry;
22147 break;
22148 case DImode:
22149 insn = gen_adddi3_carry;
22150 break;
22151 default:
22152 gcc_unreachable ();
22155 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22157 return true;
22161 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22162 but works for floating pointer parameters and nonoffsetable memories.
22163 For pushes, it returns just stack offsets; the values will be saved
22164 in the right order. Maximally three parts are generated. */
22166 static int
22167 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22169 int size;
22171 if (!TARGET_64BIT)
22172 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22173 else
22174 size = (GET_MODE_SIZE (mode) + 4) / 8;
22176 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22177 gcc_assert (size >= 2 && size <= 4);
22179 /* Optimize constant pool reference to immediates. This is used by fp
22180 moves, that force all constants to memory to allow combining. */
22181 if (MEM_P (operand) && MEM_READONLY_P (operand))
22183 rtx tmp = maybe_get_pool_constant (operand);
22184 if (tmp)
22185 operand = tmp;
22188 if (MEM_P (operand) && !offsettable_memref_p (operand))
22190 /* The only non-offsetable memories we handle are pushes. */
22191 int ok = push_operand (operand, VOIDmode);
22193 gcc_assert (ok);
22195 operand = copy_rtx (operand);
22196 PUT_MODE (operand, word_mode);
22197 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22198 return size;
22201 if (GET_CODE (operand) == CONST_VECTOR)
22203 enum machine_mode imode = int_mode_for_mode (mode);
22204 /* Caution: if we looked through a constant pool memory above,
22205 the operand may actually have a different mode now. That's
22206 ok, since we want to pun this all the way back to an integer. */
22207 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22208 gcc_assert (operand != NULL);
22209 mode = imode;
22212 if (!TARGET_64BIT)
22214 if (mode == DImode)
22215 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22216 else
22218 int i;
22220 if (REG_P (operand))
22222 gcc_assert (reload_completed);
22223 for (i = 0; i < size; i++)
22224 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22226 else if (offsettable_memref_p (operand))
22228 operand = adjust_address (operand, SImode, 0);
22229 parts[0] = operand;
22230 for (i = 1; i < size; i++)
22231 parts[i] = adjust_address (operand, SImode, 4 * i);
22233 else if (GET_CODE (operand) == CONST_DOUBLE)
22235 REAL_VALUE_TYPE r;
22236 long l[4];
22238 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22239 switch (mode)
22241 case TFmode:
22242 real_to_target (l, &r, mode);
22243 parts[3] = gen_int_mode (l[3], SImode);
22244 parts[2] = gen_int_mode (l[2], SImode);
22245 break;
22246 case XFmode:
22247 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22248 long double may not be 80-bit. */
22249 real_to_target (l, &r, mode);
22250 parts[2] = gen_int_mode (l[2], SImode);
22251 break;
22252 case DFmode:
22253 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22254 break;
22255 default:
22256 gcc_unreachable ();
22258 parts[1] = gen_int_mode (l[1], SImode);
22259 parts[0] = gen_int_mode (l[0], SImode);
22261 else
22262 gcc_unreachable ();
22265 else
22267 if (mode == TImode)
22268 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22269 if (mode == XFmode || mode == TFmode)
22271 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22272 if (REG_P (operand))
22274 gcc_assert (reload_completed);
22275 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22276 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22278 else if (offsettable_memref_p (operand))
22280 operand = adjust_address (operand, DImode, 0);
22281 parts[0] = operand;
22282 parts[1] = adjust_address (operand, upper_mode, 8);
22284 else if (GET_CODE (operand) == CONST_DOUBLE)
22286 REAL_VALUE_TYPE r;
22287 long l[4];
22289 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22290 real_to_target (l, &r, mode);
22292 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22293 if (HOST_BITS_PER_WIDE_INT >= 64)
22294 parts[0]
22295 = gen_int_mode
22296 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22297 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22298 DImode);
22299 else
22300 parts[0] = immed_double_const (l[0], l[1], DImode);
22302 if (upper_mode == SImode)
22303 parts[1] = gen_int_mode (l[2], SImode);
22304 else if (HOST_BITS_PER_WIDE_INT >= 64)
22305 parts[1]
22306 = gen_int_mode
22307 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22308 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22309 DImode);
22310 else
22311 parts[1] = immed_double_const (l[2], l[3], DImode);
22313 else
22314 gcc_unreachable ();
22318 return size;
22321 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22322 Return false when normal moves are needed; true when all required
22323 insns have been emitted. Operands 2-4 contain the input values
22324 int the correct order; operands 5-7 contain the output values. */
22326 void
22327 ix86_split_long_move (rtx operands[])
22329 rtx part[2][4];
22330 int nparts, i, j;
22331 int push = 0;
22332 int collisions = 0;
22333 enum machine_mode mode = GET_MODE (operands[0]);
22334 bool collisionparts[4];
22336 /* The DFmode expanders may ask us to move double.
22337 For 64bit target this is single move. By hiding the fact
22338 here we simplify i386.md splitters. */
22339 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22341 /* Optimize constant pool reference to immediates. This is used by
22342 fp moves, that force all constants to memory to allow combining. */
22344 if (MEM_P (operands[1])
22345 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22346 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22347 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22348 if (push_operand (operands[0], VOIDmode))
22350 operands[0] = copy_rtx (operands[0]);
22351 PUT_MODE (operands[0], word_mode);
22353 else
22354 operands[0] = gen_lowpart (DImode, operands[0]);
22355 operands[1] = gen_lowpart (DImode, operands[1]);
22356 emit_move_insn (operands[0], operands[1]);
22357 return;
22360 /* The only non-offsettable memory we handle is push. */
22361 if (push_operand (operands[0], VOIDmode))
22362 push = 1;
22363 else
22364 gcc_assert (!MEM_P (operands[0])
22365 || offsettable_memref_p (operands[0]));
22367 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22368 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22370 /* When emitting push, take care for source operands on the stack. */
22371 if (push && MEM_P (operands[1])
22372 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22374 rtx src_base = XEXP (part[1][nparts - 1], 0);
22376 /* Compensate for the stack decrement by 4. */
22377 if (!TARGET_64BIT && nparts == 3
22378 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22379 src_base = plus_constant (Pmode, src_base, 4);
22381 /* src_base refers to the stack pointer and is
22382 automatically decreased by emitted push. */
22383 for (i = 0; i < nparts; i++)
22384 part[1][i] = change_address (part[1][i],
22385 GET_MODE (part[1][i]), src_base);
22388 /* We need to do copy in the right order in case an address register
22389 of the source overlaps the destination. */
22390 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22392 rtx tmp;
22394 for (i = 0; i < nparts; i++)
22396 collisionparts[i]
22397 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22398 if (collisionparts[i])
22399 collisions++;
22402 /* Collision in the middle part can be handled by reordering. */
22403 if (collisions == 1 && nparts == 3 && collisionparts [1])
22405 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22406 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22408 else if (collisions == 1
22409 && nparts == 4
22410 && (collisionparts [1] || collisionparts [2]))
22412 if (collisionparts [1])
22414 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22415 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22417 else
22419 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22420 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22424 /* If there are more collisions, we can't handle it by reordering.
22425 Do an lea to the last part and use only one colliding move. */
22426 else if (collisions > 1)
22428 rtx base;
22430 collisions = 1;
22432 base = part[0][nparts - 1];
22434 /* Handle the case when the last part isn't valid for lea.
22435 Happens in 64-bit mode storing the 12-byte XFmode. */
22436 if (GET_MODE (base) != Pmode)
22437 base = gen_rtx_REG (Pmode, REGNO (base));
22439 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22440 part[1][0] = replace_equiv_address (part[1][0], base);
22441 for (i = 1; i < nparts; i++)
22443 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22444 part[1][i] = replace_equiv_address (part[1][i], tmp);
22449 if (push)
22451 if (!TARGET_64BIT)
22453 if (nparts == 3)
22455 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22456 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22457 stack_pointer_rtx, GEN_INT (-4)));
22458 emit_move_insn (part[0][2], part[1][2]);
22460 else if (nparts == 4)
22462 emit_move_insn (part[0][3], part[1][3]);
22463 emit_move_insn (part[0][2], part[1][2]);
22466 else
22468 /* In 64bit mode we don't have 32bit push available. In case this is
22469 register, it is OK - we will just use larger counterpart. We also
22470 retype memory - these comes from attempt to avoid REX prefix on
22471 moving of second half of TFmode value. */
22472 if (GET_MODE (part[1][1]) == SImode)
22474 switch (GET_CODE (part[1][1]))
22476 case MEM:
22477 part[1][1] = adjust_address (part[1][1], DImode, 0);
22478 break;
22480 case REG:
22481 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22482 break;
22484 default:
22485 gcc_unreachable ();
22488 if (GET_MODE (part[1][0]) == SImode)
22489 part[1][0] = part[1][1];
22492 emit_move_insn (part[0][1], part[1][1]);
22493 emit_move_insn (part[0][0], part[1][0]);
22494 return;
22497 /* Choose correct order to not overwrite the source before it is copied. */
22498 if ((REG_P (part[0][0])
22499 && REG_P (part[1][1])
22500 && (REGNO (part[0][0]) == REGNO (part[1][1])
22501 || (nparts == 3
22502 && REGNO (part[0][0]) == REGNO (part[1][2]))
22503 || (nparts == 4
22504 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22505 || (collisions > 0
22506 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22508 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22510 operands[2 + i] = part[0][j];
22511 operands[6 + i] = part[1][j];
22514 else
22516 for (i = 0; i < nparts; i++)
22518 operands[2 + i] = part[0][i];
22519 operands[6 + i] = part[1][i];
22523 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22524 if (optimize_insn_for_size_p ())
22526 for (j = 0; j < nparts - 1; j++)
22527 if (CONST_INT_P (operands[6 + j])
22528 && operands[6 + j] != const0_rtx
22529 && REG_P (operands[2 + j]))
22530 for (i = j; i < nparts - 1; i++)
22531 if (CONST_INT_P (operands[7 + i])
22532 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22533 operands[7 + i] = operands[2 + j];
22536 for (i = 0; i < nparts; i++)
22537 emit_move_insn (operands[2 + i], operands[6 + i]);
22539 return;
22542 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22543 left shift by a constant, either using a single shift or
22544 a sequence of add instructions. */
22546 static void
22547 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22549 rtx (*insn)(rtx, rtx, rtx);
22551 if (count == 1
22552 || (count * ix86_cost->add <= ix86_cost->shift_const
22553 && !optimize_insn_for_size_p ()))
22555 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22556 while (count-- > 0)
22557 emit_insn (insn (operand, operand, operand));
22559 else
22561 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22562 emit_insn (insn (operand, operand, GEN_INT (count)));
22566 void
22567 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22569 rtx (*gen_ashl3)(rtx, rtx, rtx);
22570 rtx (*gen_shld)(rtx, rtx, rtx);
22571 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22573 rtx low[2], high[2];
22574 int count;
22576 if (CONST_INT_P (operands[2]))
22578 split_double_mode (mode, operands, 2, low, high);
22579 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22581 if (count >= half_width)
22583 emit_move_insn (high[0], low[1]);
22584 emit_move_insn (low[0], const0_rtx);
22586 if (count > half_width)
22587 ix86_expand_ashl_const (high[0], count - half_width, mode);
22589 else
22591 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22593 if (!rtx_equal_p (operands[0], operands[1]))
22594 emit_move_insn (operands[0], operands[1]);
22596 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22597 ix86_expand_ashl_const (low[0], count, mode);
22599 return;
22602 split_double_mode (mode, operands, 1, low, high);
22604 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22606 if (operands[1] == const1_rtx)
22608 /* Assuming we've chosen a QImode capable registers, then 1 << N
22609 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22610 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22612 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22614 ix86_expand_clear (low[0]);
22615 ix86_expand_clear (high[0]);
22616 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22618 d = gen_lowpart (QImode, low[0]);
22619 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22620 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22621 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22623 d = gen_lowpart (QImode, high[0]);
22624 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22625 s = gen_rtx_NE (QImode, flags, const0_rtx);
22626 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22629 /* Otherwise, we can get the same results by manually performing
22630 a bit extract operation on bit 5/6, and then performing the two
22631 shifts. The two methods of getting 0/1 into low/high are exactly
22632 the same size. Avoiding the shift in the bit extract case helps
22633 pentium4 a bit; no one else seems to care much either way. */
22634 else
22636 enum machine_mode half_mode;
22637 rtx (*gen_lshr3)(rtx, rtx, rtx);
22638 rtx (*gen_and3)(rtx, rtx, rtx);
22639 rtx (*gen_xor3)(rtx, rtx, rtx);
22640 HOST_WIDE_INT bits;
22641 rtx x;
22643 if (mode == DImode)
22645 half_mode = SImode;
22646 gen_lshr3 = gen_lshrsi3;
22647 gen_and3 = gen_andsi3;
22648 gen_xor3 = gen_xorsi3;
22649 bits = 5;
22651 else
22653 half_mode = DImode;
22654 gen_lshr3 = gen_lshrdi3;
22655 gen_and3 = gen_anddi3;
22656 gen_xor3 = gen_xordi3;
22657 bits = 6;
22660 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22661 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22662 else
22663 x = gen_lowpart (half_mode, operands[2]);
22664 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22666 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22667 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22668 emit_move_insn (low[0], high[0]);
22669 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22672 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22673 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22674 return;
22677 if (operands[1] == constm1_rtx)
22679 /* For -1 << N, we can avoid the shld instruction, because we
22680 know that we're shifting 0...31/63 ones into a -1. */
22681 emit_move_insn (low[0], constm1_rtx);
22682 if (optimize_insn_for_size_p ())
22683 emit_move_insn (high[0], low[0]);
22684 else
22685 emit_move_insn (high[0], constm1_rtx);
22687 else
22689 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22691 if (!rtx_equal_p (operands[0], operands[1]))
22692 emit_move_insn (operands[0], operands[1]);
22694 split_double_mode (mode, operands, 1, low, high);
22695 emit_insn (gen_shld (high[0], low[0], operands[2]));
22698 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22700 if (TARGET_CMOVE && scratch)
22702 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22703 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22705 ix86_expand_clear (scratch);
22706 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22708 else
22710 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22711 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22713 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22717 void
22718 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22720 rtx (*gen_ashr3)(rtx, rtx, rtx)
22721 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22722 rtx (*gen_shrd)(rtx, rtx, rtx);
22723 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22725 rtx low[2], high[2];
22726 int count;
22728 if (CONST_INT_P (operands[2]))
22730 split_double_mode (mode, operands, 2, low, high);
22731 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22733 if (count == GET_MODE_BITSIZE (mode) - 1)
22735 emit_move_insn (high[0], high[1]);
22736 emit_insn (gen_ashr3 (high[0], high[0],
22737 GEN_INT (half_width - 1)));
22738 emit_move_insn (low[0], high[0]);
22741 else if (count >= half_width)
22743 emit_move_insn (low[0], high[1]);
22744 emit_move_insn (high[0], low[0]);
22745 emit_insn (gen_ashr3 (high[0], high[0],
22746 GEN_INT (half_width - 1)));
22748 if (count > half_width)
22749 emit_insn (gen_ashr3 (low[0], low[0],
22750 GEN_INT (count - half_width)));
22752 else
22754 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22756 if (!rtx_equal_p (operands[0], operands[1]))
22757 emit_move_insn (operands[0], operands[1]);
22759 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22760 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22763 else
22765 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22767 if (!rtx_equal_p (operands[0], operands[1]))
22768 emit_move_insn (operands[0], operands[1]);
22770 split_double_mode (mode, operands, 1, low, high);
22772 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22773 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22775 if (TARGET_CMOVE && scratch)
22777 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22778 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22780 emit_move_insn (scratch, high[0]);
22781 emit_insn (gen_ashr3 (scratch, scratch,
22782 GEN_INT (half_width - 1)));
22783 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22784 scratch));
22786 else
22788 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22789 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22791 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22796 void
22797 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22799 rtx (*gen_lshr3)(rtx, rtx, rtx)
22800 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22801 rtx (*gen_shrd)(rtx, rtx, rtx);
22802 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22804 rtx low[2], high[2];
22805 int count;
22807 if (CONST_INT_P (operands[2]))
22809 split_double_mode (mode, operands, 2, low, high);
22810 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22812 if (count >= half_width)
22814 emit_move_insn (low[0], high[1]);
22815 ix86_expand_clear (high[0]);
22817 if (count > half_width)
22818 emit_insn (gen_lshr3 (low[0], low[0],
22819 GEN_INT (count - half_width)));
22821 else
22823 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22825 if (!rtx_equal_p (operands[0], operands[1]))
22826 emit_move_insn (operands[0], operands[1]);
22828 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22829 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22832 else
22834 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22836 if (!rtx_equal_p (operands[0], operands[1]))
22837 emit_move_insn (operands[0], operands[1]);
22839 split_double_mode (mode, operands, 1, low, high);
22841 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22842 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22844 if (TARGET_CMOVE && scratch)
22846 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22847 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22849 ix86_expand_clear (scratch);
22850 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22851 scratch));
22853 else
22855 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22856 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22858 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22863 /* Predict just emitted jump instruction to be taken with probability PROB. */
22864 static void
22865 predict_jump (int prob)
22867 rtx insn = get_last_insn ();
22868 gcc_assert (JUMP_P (insn));
22869 add_int_reg_note (insn, REG_BR_PROB, prob);
22872 /* Helper function for the string operations below. Dest VARIABLE whether
22873 it is aligned to VALUE bytes. If true, jump to the label. */
22874 static rtx
22875 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22877 rtx label = gen_label_rtx ();
22878 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22879 if (GET_MODE (variable) == DImode)
22880 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22881 else
22882 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22883 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22884 1, label);
22885 if (epilogue)
22886 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22887 else
22888 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22889 return label;
22892 /* Adjust COUNTER by the VALUE. */
22893 static void
22894 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22896 rtx (*gen_add)(rtx, rtx, rtx)
22897 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22899 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22902 /* Zero extend possibly SImode EXP to Pmode register. */
22904 ix86_zero_extend_to_Pmode (rtx exp)
22906 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22909 /* Divide COUNTREG by SCALE. */
22910 static rtx
22911 scale_counter (rtx countreg, int scale)
22913 rtx sc;
22915 if (scale == 1)
22916 return countreg;
22917 if (CONST_INT_P (countreg))
22918 return GEN_INT (INTVAL (countreg) / scale);
22919 gcc_assert (REG_P (countreg));
22921 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22922 GEN_INT (exact_log2 (scale)),
22923 NULL, 1, OPTAB_DIRECT);
22924 return sc;
22927 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22928 DImode for constant loop counts. */
22930 static enum machine_mode
22931 counter_mode (rtx count_exp)
22933 if (GET_MODE (count_exp) != VOIDmode)
22934 return GET_MODE (count_exp);
22935 if (!CONST_INT_P (count_exp))
22936 return Pmode;
22937 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22938 return DImode;
22939 return SImode;
22942 /* Copy the address to a Pmode register. This is used for x32 to
22943 truncate DImode TLS address to a SImode register. */
22945 static rtx
22946 ix86_copy_addr_to_reg (rtx addr)
22948 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22949 return copy_addr_to_reg (addr);
22950 else
22952 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22953 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22957 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22958 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22959 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22960 memory by VALUE (supposed to be in MODE).
22962 The size is rounded down to whole number of chunk size moved at once.
22963 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22966 static void
22967 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22968 rtx destptr, rtx srcptr, rtx value,
22969 rtx count, enum machine_mode mode, int unroll,
22970 int expected_size, bool issetmem)
22972 rtx out_label, top_label, iter, tmp;
22973 enum machine_mode iter_mode = counter_mode (count);
22974 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22975 rtx piece_size = GEN_INT (piece_size_n);
22976 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22977 rtx size;
22978 int i;
22980 top_label = gen_label_rtx ();
22981 out_label = gen_label_rtx ();
22982 iter = gen_reg_rtx (iter_mode);
22984 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22985 NULL, 1, OPTAB_DIRECT);
22986 /* Those two should combine. */
22987 if (piece_size == const1_rtx)
22989 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22990 true, out_label);
22991 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22993 emit_move_insn (iter, const0_rtx);
22995 emit_label (top_label);
22997 tmp = convert_modes (Pmode, iter_mode, iter, true);
22999 /* This assert could be relaxed - in this case we'll need to compute
23000 smallest power of two, containing in PIECE_SIZE_N and pass it to
23001 offset_address. */
23002 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23003 destmem = offset_address (destmem, tmp, piece_size_n);
23004 destmem = adjust_address (destmem, mode, 0);
23006 if (!issetmem)
23008 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23009 srcmem = adjust_address (srcmem, mode, 0);
23011 /* When unrolling for chips that reorder memory reads and writes,
23012 we can save registers by using single temporary.
23013 Also using 4 temporaries is overkill in 32bit mode. */
23014 if (!TARGET_64BIT && 0)
23016 for (i = 0; i < unroll; i++)
23018 if (i)
23020 destmem =
23021 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23022 srcmem =
23023 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23025 emit_move_insn (destmem, srcmem);
23028 else
23030 rtx tmpreg[4];
23031 gcc_assert (unroll <= 4);
23032 for (i = 0; i < unroll; i++)
23034 tmpreg[i] = gen_reg_rtx (mode);
23035 if (i)
23037 srcmem =
23038 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23040 emit_move_insn (tmpreg[i], srcmem);
23042 for (i = 0; i < unroll; i++)
23044 if (i)
23046 destmem =
23047 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23049 emit_move_insn (destmem, tmpreg[i]);
23053 else
23054 for (i = 0; i < unroll; i++)
23056 if (i)
23057 destmem =
23058 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23059 emit_move_insn (destmem, value);
23062 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23063 true, OPTAB_LIB_WIDEN);
23064 if (tmp != iter)
23065 emit_move_insn (iter, tmp);
23067 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23068 true, top_label);
23069 if (expected_size != -1)
23071 expected_size /= GET_MODE_SIZE (mode) * unroll;
23072 if (expected_size == 0)
23073 predict_jump (0);
23074 else if (expected_size > REG_BR_PROB_BASE)
23075 predict_jump (REG_BR_PROB_BASE - 1);
23076 else
23077 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23079 else
23080 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23081 iter = ix86_zero_extend_to_Pmode (iter);
23082 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23083 true, OPTAB_LIB_WIDEN);
23084 if (tmp != destptr)
23085 emit_move_insn (destptr, tmp);
23086 if (!issetmem)
23088 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23089 true, OPTAB_LIB_WIDEN);
23090 if (tmp != srcptr)
23091 emit_move_insn (srcptr, tmp);
23093 emit_label (out_label);
23096 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23097 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23098 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23099 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23100 ORIG_VALUE is the original value passed to memset to fill the memory with.
23101 Other arguments have same meaning as for previous function. */
23103 static void
23104 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23105 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23106 rtx count,
23107 enum machine_mode mode, bool issetmem)
23109 rtx destexp;
23110 rtx srcexp;
23111 rtx countreg;
23112 HOST_WIDE_INT rounded_count;
23114 /* If possible, it is shorter to use rep movs.
23115 TODO: Maybe it is better to move this logic to decide_alg. */
23116 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23117 && (!issetmem || orig_value == const0_rtx))
23118 mode = SImode;
23120 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23121 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23123 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23124 GET_MODE_SIZE (mode)));
23125 if (mode != QImode)
23127 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23128 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23129 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23131 else
23132 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23133 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23135 rounded_count = (INTVAL (count)
23136 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23137 destmem = shallow_copy_rtx (destmem);
23138 set_mem_size (destmem, rounded_count);
23140 else if (MEM_SIZE_KNOWN_P (destmem))
23141 clear_mem_size (destmem);
23143 if (issetmem)
23145 value = force_reg (mode, gen_lowpart (mode, value));
23146 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23148 else
23150 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23151 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23152 if (mode != QImode)
23154 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23155 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23156 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23158 else
23159 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23160 if (CONST_INT_P (count))
23162 rounded_count = (INTVAL (count)
23163 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23164 srcmem = shallow_copy_rtx (srcmem);
23165 set_mem_size (srcmem, rounded_count);
23167 else
23169 if (MEM_SIZE_KNOWN_P (srcmem))
23170 clear_mem_size (srcmem);
23172 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23173 destexp, srcexp));
23177 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23178 DESTMEM.
23179 SRC is passed by pointer to be updated on return.
23180 Return value is updated DST. */
23181 static rtx
23182 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23183 HOST_WIDE_INT size_to_move)
23185 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23186 enum insn_code code;
23187 enum machine_mode move_mode;
23188 int piece_size, i;
23190 /* Find the widest mode in which we could perform moves.
23191 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23192 it until move of such size is supported. */
23193 piece_size = 1 << floor_log2 (size_to_move);
23194 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23195 code = optab_handler (mov_optab, move_mode);
23196 while (code == CODE_FOR_nothing && piece_size > 1)
23198 piece_size >>= 1;
23199 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23200 code = optab_handler (mov_optab, move_mode);
23203 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23204 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23205 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23207 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23208 move_mode = mode_for_vector (word_mode, nunits);
23209 code = optab_handler (mov_optab, move_mode);
23210 if (code == CODE_FOR_nothing)
23212 move_mode = word_mode;
23213 piece_size = GET_MODE_SIZE (move_mode);
23214 code = optab_handler (mov_optab, move_mode);
23217 gcc_assert (code != CODE_FOR_nothing);
23219 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23220 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23222 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23223 gcc_assert (size_to_move % piece_size == 0);
23224 adjust = GEN_INT (piece_size);
23225 for (i = 0; i < size_to_move; i += piece_size)
23227 /* We move from memory to memory, so we'll need to do it via
23228 a temporary register. */
23229 tempreg = gen_reg_rtx (move_mode);
23230 emit_insn (GEN_FCN (code) (tempreg, src));
23231 emit_insn (GEN_FCN (code) (dst, tempreg));
23233 emit_move_insn (destptr,
23234 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23235 emit_move_insn (srcptr,
23236 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23238 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23239 piece_size);
23240 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23241 piece_size);
23244 /* Update DST and SRC rtx. */
23245 *srcmem = src;
23246 return dst;
23249 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23250 static void
23251 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23252 rtx destptr, rtx srcptr, rtx count, int max_size)
23254 rtx src, dest;
23255 if (CONST_INT_P (count))
23257 HOST_WIDE_INT countval = INTVAL (count);
23258 HOST_WIDE_INT epilogue_size = countval % max_size;
23259 int i;
23261 /* For now MAX_SIZE should be a power of 2. This assert could be
23262 relaxed, but it'll require a bit more complicated epilogue
23263 expanding. */
23264 gcc_assert ((max_size & (max_size - 1)) == 0);
23265 for (i = max_size; i >= 1; i >>= 1)
23267 if (epilogue_size & i)
23268 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23270 return;
23272 if (max_size > 8)
23274 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23275 count, 1, OPTAB_DIRECT);
23276 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23277 count, QImode, 1, 4, false);
23278 return;
23281 /* When there are stringops, we can cheaply increase dest and src pointers.
23282 Otherwise we save code size by maintaining offset (zero is readily
23283 available from preceding rep operation) and using x86 addressing modes.
23285 if (TARGET_SINGLE_STRINGOP)
23287 if (max_size > 4)
23289 rtx label = ix86_expand_aligntest (count, 4, true);
23290 src = change_address (srcmem, SImode, srcptr);
23291 dest = change_address (destmem, SImode, destptr);
23292 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23293 emit_label (label);
23294 LABEL_NUSES (label) = 1;
23296 if (max_size > 2)
23298 rtx label = ix86_expand_aligntest (count, 2, true);
23299 src = change_address (srcmem, HImode, srcptr);
23300 dest = change_address (destmem, HImode, destptr);
23301 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23302 emit_label (label);
23303 LABEL_NUSES (label) = 1;
23305 if (max_size > 1)
23307 rtx label = ix86_expand_aligntest (count, 1, true);
23308 src = change_address (srcmem, QImode, srcptr);
23309 dest = change_address (destmem, QImode, destptr);
23310 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23311 emit_label (label);
23312 LABEL_NUSES (label) = 1;
23315 else
23317 rtx offset = force_reg (Pmode, const0_rtx);
23318 rtx tmp;
23320 if (max_size > 4)
23322 rtx label = ix86_expand_aligntest (count, 4, true);
23323 src = change_address (srcmem, SImode, srcptr);
23324 dest = change_address (destmem, SImode, destptr);
23325 emit_move_insn (dest, src);
23326 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23327 true, OPTAB_LIB_WIDEN);
23328 if (tmp != offset)
23329 emit_move_insn (offset, tmp);
23330 emit_label (label);
23331 LABEL_NUSES (label) = 1;
23333 if (max_size > 2)
23335 rtx label = ix86_expand_aligntest (count, 2, true);
23336 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23337 src = change_address (srcmem, HImode, tmp);
23338 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23339 dest = change_address (destmem, HImode, tmp);
23340 emit_move_insn (dest, src);
23341 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23342 true, OPTAB_LIB_WIDEN);
23343 if (tmp != offset)
23344 emit_move_insn (offset, tmp);
23345 emit_label (label);
23346 LABEL_NUSES (label) = 1;
23348 if (max_size > 1)
23350 rtx label = ix86_expand_aligntest (count, 1, true);
23351 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23352 src = change_address (srcmem, QImode, tmp);
23353 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23354 dest = change_address (destmem, QImode, tmp);
23355 emit_move_insn (dest, src);
23356 emit_label (label);
23357 LABEL_NUSES (label) = 1;
23362 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23363 with value PROMOTED_VAL.
23364 SRC is passed by pointer to be updated on return.
23365 Return value is updated DST. */
23366 static rtx
23367 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23368 HOST_WIDE_INT size_to_move)
23370 rtx dst = destmem, adjust;
23371 enum insn_code code;
23372 enum machine_mode move_mode;
23373 int piece_size, i;
23375 /* Find the widest mode in which we could perform moves.
23376 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23377 it until move of such size is supported. */
23378 move_mode = GET_MODE (promoted_val);
23379 if (move_mode == VOIDmode)
23380 move_mode = QImode;
23381 if (size_to_move < GET_MODE_SIZE (move_mode))
23383 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23384 promoted_val = gen_lowpart (move_mode, promoted_val);
23386 piece_size = GET_MODE_SIZE (move_mode);
23387 code = optab_handler (mov_optab, move_mode);
23388 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23390 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23392 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23393 gcc_assert (size_to_move % piece_size == 0);
23394 adjust = GEN_INT (piece_size);
23395 for (i = 0; i < size_to_move; i += piece_size)
23397 if (piece_size <= GET_MODE_SIZE (word_mode))
23399 emit_insn (gen_strset (destptr, dst, promoted_val));
23400 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23401 piece_size);
23402 continue;
23405 emit_insn (GEN_FCN (code) (dst, promoted_val));
23407 emit_move_insn (destptr,
23408 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23410 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23411 piece_size);
23414 /* Update DST rtx. */
23415 return dst;
23417 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23418 static void
23419 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23420 rtx count, int max_size)
23422 count =
23423 expand_simple_binop (counter_mode (count), AND, count,
23424 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23425 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23426 gen_lowpart (QImode, value), count, QImode,
23427 1, max_size / 2, true);
23430 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23431 static void
23432 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23433 rtx count, int max_size)
23435 rtx dest;
23437 if (CONST_INT_P (count))
23439 HOST_WIDE_INT countval = INTVAL (count);
23440 HOST_WIDE_INT epilogue_size = countval % max_size;
23441 int i;
23443 /* For now MAX_SIZE should be a power of 2. This assert could be
23444 relaxed, but it'll require a bit more complicated epilogue
23445 expanding. */
23446 gcc_assert ((max_size & (max_size - 1)) == 0);
23447 for (i = max_size; i >= 1; i >>= 1)
23449 if (epilogue_size & i)
23451 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23452 destmem = emit_memset (destmem, destptr, vec_value, i);
23453 else
23454 destmem = emit_memset (destmem, destptr, value, i);
23457 return;
23459 if (max_size > 32)
23461 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23462 return;
23464 if (max_size > 16)
23466 rtx label = ix86_expand_aligntest (count, 16, true);
23467 if (TARGET_64BIT)
23469 dest = change_address (destmem, DImode, destptr);
23470 emit_insn (gen_strset (destptr, dest, value));
23471 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23472 emit_insn (gen_strset (destptr, dest, value));
23474 else
23476 dest = change_address (destmem, SImode, destptr);
23477 emit_insn (gen_strset (destptr, dest, value));
23478 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23479 emit_insn (gen_strset (destptr, dest, value));
23480 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23481 emit_insn (gen_strset (destptr, dest, value));
23482 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23483 emit_insn (gen_strset (destptr, dest, value));
23485 emit_label (label);
23486 LABEL_NUSES (label) = 1;
23488 if (max_size > 8)
23490 rtx label = ix86_expand_aligntest (count, 8, true);
23491 if (TARGET_64BIT)
23493 dest = change_address (destmem, DImode, destptr);
23494 emit_insn (gen_strset (destptr, dest, value));
23496 else
23498 dest = change_address (destmem, SImode, destptr);
23499 emit_insn (gen_strset (destptr, dest, value));
23500 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23501 emit_insn (gen_strset (destptr, dest, value));
23503 emit_label (label);
23504 LABEL_NUSES (label) = 1;
23506 if (max_size > 4)
23508 rtx label = ix86_expand_aligntest (count, 4, true);
23509 dest = change_address (destmem, SImode, destptr);
23510 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23511 emit_label (label);
23512 LABEL_NUSES (label) = 1;
23514 if (max_size > 2)
23516 rtx label = ix86_expand_aligntest (count, 2, true);
23517 dest = change_address (destmem, HImode, destptr);
23518 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23519 emit_label (label);
23520 LABEL_NUSES (label) = 1;
23522 if (max_size > 1)
23524 rtx label = ix86_expand_aligntest (count, 1, true);
23525 dest = change_address (destmem, QImode, destptr);
23526 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23527 emit_label (label);
23528 LABEL_NUSES (label) = 1;
23532 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23533 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23534 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23535 ignored.
23536 Return value is updated DESTMEM. */
23537 static rtx
23538 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23539 rtx destptr, rtx srcptr, rtx value,
23540 rtx vec_value, rtx count, int align,
23541 int desired_alignment, bool issetmem)
23543 int i;
23544 for (i = 1; i < desired_alignment; i <<= 1)
23546 if (align <= i)
23548 rtx label = ix86_expand_aligntest (destptr, i, false);
23549 if (issetmem)
23551 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23552 destmem = emit_memset (destmem, destptr, vec_value, i);
23553 else
23554 destmem = emit_memset (destmem, destptr, value, i);
23556 else
23557 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23558 ix86_adjust_counter (count, i);
23559 emit_label (label);
23560 LABEL_NUSES (label) = 1;
23561 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23564 return destmem;
23567 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23568 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23569 and jump to DONE_LABEL. */
23570 static void
23571 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23572 rtx destptr, rtx srcptr,
23573 rtx value, rtx vec_value,
23574 rtx count, int size,
23575 rtx done_label, bool issetmem)
23577 rtx label = ix86_expand_aligntest (count, size, false);
23578 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23579 rtx modesize;
23580 int n;
23582 /* If we do not have vector value to copy, we must reduce size. */
23583 if (issetmem)
23585 if (!vec_value)
23587 if (GET_MODE (value) == VOIDmode && size > 8)
23588 mode = Pmode;
23589 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23590 mode = GET_MODE (value);
23592 else
23593 mode = GET_MODE (vec_value), value = vec_value;
23595 else
23597 /* Choose appropriate vector mode. */
23598 if (size >= 32)
23599 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23600 else if (size >= 16)
23601 mode = TARGET_SSE ? V16QImode : DImode;
23602 srcmem = change_address (srcmem, mode, srcptr);
23604 destmem = change_address (destmem, mode, destptr);
23605 modesize = GEN_INT (GET_MODE_SIZE (mode));
23606 gcc_assert (GET_MODE_SIZE (mode) <= size);
23607 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23609 if (issetmem)
23610 emit_move_insn (destmem, gen_lowpart (mode, value));
23611 else
23613 emit_move_insn (destmem, srcmem);
23614 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23616 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23619 destmem = offset_address (destmem, count, 1);
23620 destmem = offset_address (destmem, GEN_INT (-2 * size),
23621 GET_MODE_SIZE (mode));
23622 if (!issetmem)
23624 srcmem = offset_address (srcmem, count, 1);
23625 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23626 GET_MODE_SIZE (mode));
23628 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23630 if (issetmem)
23631 emit_move_insn (destmem, gen_lowpart (mode, value));
23632 else
23634 emit_move_insn (destmem, srcmem);
23635 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23637 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23639 emit_jump_insn (gen_jump (done_label));
23640 emit_barrier ();
23642 emit_label (label);
23643 LABEL_NUSES (label) = 1;
23646 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23647 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23648 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23649 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23650 DONE_LABEL is a label after the whole copying sequence. The label is created
23651 on demand if *DONE_LABEL is NULL.
23652 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23653 bounds after the initial copies.
23655 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23656 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23657 we will dispatch to a library call for large blocks.
23659 In pseudocode we do:
23661 if (COUNT < SIZE)
23663 Assume that SIZE is 4. Bigger sizes are handled analogously
23664 if (COUNT & 4)
23666 copy 4 bytes from SRCPTR to DESTPTR
23667 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23668 goto done_label
23670 if (!COUNT)
23671 goto done_label;
23672 copy 1 byte from SRCPTR to DESTPTR
23673 if (COUNT & 2)
23675 copy 2 bytes from SRCPTR to DESTPTR
23676 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23679 else
23681 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23682 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23684 OLD_DESPTR = DESTPTR;
23685 Align DESTPTR up to DESIRED_ALIGN
23686 SRCPTR += DESTPTR - OLD_DESTPTR
23687 COUNT -= DEST_PTR - OLD_DESTPTR
23688 if (DYNAMIC_CHECK)
23689 Round COUNT down to multiple of SIZE
23690 << optional caller supplied zero size guard is here >>
23691 << optional caller suppplied dynamic check is here >>
23692 << caller supplied main copy loop is here >>
23694 done_label:
23696 static void
23697 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23698 rtx *destptr, rtx *srcptr,
23699 enum machine_mode mode,
23700 rtx value, rtx vec_value,
23701 rtx *count,
23702 rtx *done_label,
23703 int size,
23704 int desired_align,
23705 int align,
23706 unsigned HOST_WIDE_INT *min_size,
23707 bool dynamic_check,
23708 bool issetmem)
23710 rtx loop_label = NULL, label;
23711 int n;
23712 rtx modesize;
23713 int prolog_size = 0;
23714 rtx mode_value;
23716 /* Chose proper value to copy. */
23717 if (issetmem && VECTOR_MODE_P (mode))
23718 mode_value = vec_value;
23719 else
23720 mode_value = value;
23721 gcc_assert (GET_MODE_SIZE (mode) <= size);
23723 /* See if block is big or small, handle small blocks. */
23724 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23726 int size2 = size;
23727 loop_label = gen_label_rtx ();
23729 if (!*done_label)
23730 *done_label = gen_label_rtx ();
23732 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23733 1, loop_label);
23734 size2 >>= 1;
23736 /* Handle sizes > 3. */
23737 for (;size2 > 2; size2 >>= 1)
23738 expand_small_movmem_or_setmem (destmem, srcmem,
23739 *destptr, *srcptr,
23740 value, vec_value,
23741 *count,
23742 size2, *done_label, issetmem);
23743 /* Nothing to copy? Jump to DONE_LABEL if so */
23744 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23745 1, *done_label);
23747 /* Do a byte copy. */
23748 destmem = change_address (destmem, QImode, *destptr);
23749 if (issetmem)
23750 emit_move_insn (destmem, gen_lowpart (QImode, value));
23751 else
23753 srcmem = change_address (srcmem, QImode, *srcptr);
23754 emit_move_insn (destmem, srcmem);
23757 /* Handle sizes 2 and 3. */
23758 label = ix86_expand_aligntest (*count, 2, false);
23759 destmem = change_address (destmem, HImode, *destptr);
23760 destmem = offset_address (destmem, *count, 1);
23761 destmem = offset_address (destmem, GEN_INT (-2), 2);
23762 if (issetmem)
23763 emit_move_insn (destmem, gen_lowpart (HImode, value));
23764 else
23766 srcmem = change_address (srcmem, HImode, *srcptr);
23767 srcmem = offset_address (srcmem, *count, 1);
23768 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23769 emit_move_insn (destmem, srcmem);
23772 emit_label (label);
23773 LABEL_NUSES (label) = 1;
23774 emit_jump_insn (gen_jump (*done_label));
23775 emit_barrier ();
23777 else
23778 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23779 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23781 /* Start memcpy for COUNT >= SIZE. */
23782 if (loop_label)
23784 emit_label (loop_label);
23785 LABEL_NUSES (loop_label) = 1;
23788 /* Copy first desired_align bytes. */
23789 if (!issetmem)
23790 srcmem = change_address (srcmem, mode, *srcptr);
23791 destmem = change_address (destmem, mode, *destptr);
23792 modesize = GEN_INT (GET_MODE_SIZE (mode));
23793 for (n = 0; prolog_size < desired_align - align; n++)
23795 if (issetmem)
23796 emit_move_insn (destmem, mode_value);
23797 else
23799 emit_move_insn (destmem, srcmem);
23800 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23802 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23803 prolog_size += GET_MODE_SIZE (mode);
23807 /* Copy last SIZE bytes. */
23808 destmem = offset_address (destmem, *count, 1);
23809 destmem = offset_address (destmem,
23810 GEN_INT (-size - prolog_size),
23812 if (issetmem)
23813 emit_move_insn (destmem, mode_value);
23814 else
23816 srcmem = offset_address (srcmem, *count, 1);
23817 srcmem = offset_address (srcmem,
23818 GEN_INT (-size - prolog_size),
23820 emit_move_insn (destmem, srcmem);
23822 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23824 destmem = offset_address (destmem, modesize, 1);
23825 if (issetmem)
23826 emit_move_insn (destmem, mode_value);
23827 else
23829 srcmem = offset_address (srcmem, modesize, 1);
23830 emit_move_insn (destmem, srcmem);
23834 /* Align destination. */
23835 if (desired_align > 1 && desired_align > align)
23837 rtx saveddest = *destptr;
23839 gcc_assert (desired_align <= size);
23840 /* Align destptr up, place it to new register. */
23841 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23842 GEN_INT (prolog_size),
23843 NULL_RTX, 1, OPTAB_DIRECT);
23844 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23845 GEN_INT (-desired_align),
23846 *destptr, 1, OPTAB_DIRECT);
23847 /* See how many bytes we skipped. */
23848 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23849 *destptr,
23850 saveddest, 1, OPTAB_DIRECT);
23851 /* Adjust srcptr and count. */
23852 if (!issetmem)
23853 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23854 *srcptr, 1, OPTAB_DIRECT);
23855 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23856 saveddest, *count, 1, OPTAB_DIRECT);
23857 /* We copied at most size + prolog_size. */
23858 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23859 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23860 else
23861 *min_size = 0;
23863 /* Our loops always round down the bock size, but for dispatch to library
23864 we need precise value. */
23865 if (dynamic_check)
23866 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23867 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23869 else
23871 gcc_assert (prolog_size == 0);
23872 /* Decrease count, so we won't end up copying last word twice. */
23873 if (!CONST_INT_P (*count))
23874 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23875 constm1_rtx, *count, 1, OPTAB_DIRECT);
23876 else
23877 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23878 if (*min_size)
23879 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23884 /* This function is like the previous one, except here we know how many bytes
23885 need to be copied. That allows us to update alignment not only of DST, which
23886 is returned, but also of SRC, which is passed as a pointer for that
23887 reason. */
23888 static rtx
23889 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23890 rtx srcreg, rtx value, rtx vec_value,
23891 int desired_align, int align_bytes,
23892 bool issetmem)
23894 rtx src = NULL;
23895 rtx orig_dst = dst;
23896 rtx orig_src = NULL;
23897 int piece_size = 1;
23898 int copied_bytes = 0;
23900 if (!issetmem)
23902 gcc_assert (srcp != NULL);
23903 src = *srcp;
23904 orig_src = src;
23907 for (piece_size = 1;
23908 piece_size <= desired_align && copied_bytes < align_bytes;
23909 piece_size <<= 1)
23911 if (align_bytes & piece_size)
23913 if (issetmem)
23915 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23916 dst = emit_memset (dst, destreg, vec_value, piece_size);
23917 else
23918 dst = emit_memset (dst, destreg, value, piece_size);
23920 else
23921 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23922 copied_bytes += piece_size;
23925 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23926 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23927 if (MEM_SIZE_KNOWN_P (orig_dst))
23928 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23930 if (!issetmem)
23932 int src_align_bytes = get_mem_align_offset (src, desired_align
23933 * BITS_PER_UNIT);
23934 if (src_align_bytes >= 0)
23935 src_align_bytes = desired_align - src_align_bytes;
23936 if (src_align_bytes >= 0)
23938 unsigned int src_align;
23939 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23941 if ((src_align_bytes & (src_align - 1))
23942 == (align_bytes & (src_align - 1)))
23943 break;
23945 if (src_align > (unsigned int) desired_align)
23946 src_align = desired_align;
23947 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23948 set_mem_align (src, src_align * BITS_PER_UNIT);
23950 if (MEM_SIZE_KNOWN_P (orig_src))
23951 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23952 *srcp = src;
23955 return dst;
23958 /* Return true if ALG can be used in current context.
23959 Assume we expand memset if MEMSET is true. */
23960 static bool
23961 alg_usable_p (enum stringop_alg alg, bool memset)
23963 if (alg == no_stringop)
23964 return false;
23965 if (alg == vector_loop)
23966 return TARGET_SSE || TARGET_AVX;
23967 /* Algorithms using the rep prefix want at least edi and ecx;
23968 additionally, memset wants eax and memcpy wants esi. Don't
23969 consider such algorithms if the user has appropriated those
23970 registers for their own purposes. */
23971 if (alg == rep_prefix_1_byte
23972 || alg == rep_prefix_4_byte
23973 || alg == rep_prefix_8_byte)
23974 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23975 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23976 return true;
23979 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23980 static enum stringop_alg
23981 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23982 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23983 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23985 const struct stringop_algs * algs;
23986 bool optimize_for_speed;
23987 int max = 0;
23988 const struct processor_costs *cost;
23989 int i;
23990 bool any_alg_usable_p = false;
23992 *noalign = false;
23993 *dynamic_check = -1;
23995 /* Even if the string operation call is cold, we still might spend a lot
23996 of time processing large blocks. */
23997 if (optimize_function_for_size_p (cfun)
23998 || (optimize_insn_for_size_p ()
23999 && (max_size < 256
24000 || (expected_size != -1 && expected_size < 256))))
24001 optimize_for_speed = false;
24002 else
24003 optimize_for_speed = true;
24005 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24006 if (memset)
24007 algs = &cost->memset[TARGET_64BIT != 0];
24008 else
24009 algs = &cost->memcpy[TARGET_64BIT != 0];
24011 /* See maximal size for user defined algorithm. */
24012 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24014 enum stringop_alg candidate = algs->size[i].alg;
24015 bool usable = alg_usable_p (candidate, memset);
24016 any_alg_usable_p |= usable;
24018 if (candidate != libcall && candidate && usable)
24019 max = algs->size[i].max;
24022 /* If expected size is not known but max size is small enough
24023 so inline version is a win, set expected size into
24024 the range. */
24025 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24026 && expected_size == -1)
24027 expected_size = min_size / 2 + max_size / 2;
24029 /* If user specified the algorithm, honnor it if possible. */
24030 if (ix86_stringop_alg != no_stringop
24031 && alg_usable_p (ix86_stringop_alg, memset))
24032 return ix86_stringop_alg;
24033 /* rep; movq or rep; movl is the smallest variant. */
24034 else if (!optimize_for_speed)
24036 *noalign = true;
24037 if (!count || (count & 3) || (memset && !zero_memset))
24038 return alg_usable_p (rep_prefix_1_byte, memset)
24039 ? rep_prefix_1_byte : loop_1_byte;
24040 else
24041 return alg_usable_p (rep_prefix_4_byte, memset)
24042 ? rep_prefix_4_byte : loop;
24044 /* Very tiny blocks are best handled via the loop, REP is expensive to
24045 setup. */
24046 else if (expected_size != -1 && expected_size < 4)
24047 return loop_1_byte;
24048 else if (expected_size != -1)
24050 enum stringop_alg alg = libcall;
24051 bool alg_noalign = false;
24052 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24054 /* We get here if the algorithms that were not libcall-based
24055 were rep-prefix based and we are unable to use rep prefixes
24056 based on global register usage. Break out of the loop and
24057 use the heuristic below. */
24058 if (algs->size[i].max == 0)
24059 break;
24060 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24062 enum stringop_alg candidate = algs->size[i].alg;
24064 if (candidate != libcall && alg_usable_p (candidate, memset))
24066 alg = candidate;
24067 alg_noalign = algs->size[i].noalign;
24069 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24070 last non-libcall inline algorithm. */
24071 if (TARGET_INLINE_ALL_STRINGOPS)
24073 /* When the current size is best to be copied by a libcall,
24074 but we are still forced to inline, run the heuristic below
24075 that will pick code for medium sized blocks. */
24076 if (alg != libcall)
24078 *noalign = alg_noalign;
24079 return alg;
24081 break;
24083 else if (alg_usable_p (candidate, memset))
24085 *noalign = algs->size[i].noalign;
24086 return candidate;
24091 /* When asked to inline the call anyway, try to pick meaningful choice.
24092 We look for maximal size of block that is faster to copy by hand and
24093 take blocks of at most of that size guessing that average size will
24094 be roughly half of the block.
24096 If this turns out to be bad, we might simply specify the preferred
24097 choice in ix86_costs. */
24098 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24099 && (algs->unknown_size == libcall
24100 || !alg_usable_p (algs->unknown_size, memset)))
24102 enum stringop_alg alg;
24104 /* If there aren't any usable algorithms, then recursing on
24105 smaller sizes isn't going to find anything. Just return the
24106 simple byte-at-a-time copy loop. */
24107 if (!any_alg_usable_p)
24109 /* Pick something reasonable. */
24110 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24111 *dynamic_check = 128;
24112 return loop_1_byte;
24114 if (max <= 0)
24115 max = 4096;
24116 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24117 zero_memset, dynamic_check, noalign);
24118 gcc_assert (*dynamic_check == -1);
24119 gcc_assert (alg != libcall);
24120 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24121 *dynamic_check = max;
24122 return alg;
24124 return (alg_usable_p (algs->unknown_size, memset)
24125 ? algs->unknown_size : libcall);
24128 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24129 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24130 static int
24131 decide_alignment (int align,
24132 enum stringop_alg alg,
24133 int expected_size,
24134 enum machine_mode move_mode)
24136 int desired_align = 0;
24138 gcc_assert (alg != no_stringop);
24140 if (alg == libcall)
24141 return 0;
24142 if (move_mode == VOIDmode)
24143 return 0;
24145 desired_align = GET_MODE_SIZE (move_mode);
24146 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24147 copying whole cacheline at once. */
24148 if (TARGET_PENTIUMPRO
24149 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24150 desired_align = 8;
24152 if (optimize_size)
24153 desired_align = 1;
24154 if (desired_align < align)
24155 desired_align = align;
24156 if (expected_size != -1 && expected_size < 4)
24157 desired_align = align;
24159 return desired_align;
24163 /* Helper function for memcpy. For QImode value 0xXY produce
24164 0xXYXYXYXY of wide specified by MODE. This is essentially
24165 a * 0x10101010, but we can do slightly better than
24166 synth_mult by unwinding the sequence by hand on CPUs with
24167 slow multiply. */
24168 static rtx
24169 promote_duplicated_reg (enum machine_mode mode, rtx val)
24171 enum machine_mode valmode = GET_MODE (val);
24172 rtx tmp;
24173 int nops = mode == DImode ? 3 : 2;
24175 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24176 if (val == const0_rtx)
24177 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24178 if (CONST_INT_P (val))
24180 HOST_WIDE_INT v = INTVAL (val) & 255;
24182 v |= v << 8;
24183 v |= v << 16;
24184 if (mode == DImode)
24185 v |= (v << 16) << 16;
24186 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24189 if (valmode == VOIDmode)
24190 valmode = QImode;
24191 if (valmode != QImode)
24192 val = gen_lowpart (QImode, val);
24193 if (mode == QImode)
24194 return val;
24195 if (!TARGET_PARTIAL_REG_STALL)
24196 nops--;
24197 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24198 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24199 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24200 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24202 rtx reg = convert_modes (mode, QImode, val, true);
24203 tmp = promote_duplicated_reg (mode, const1_rtx);
24204 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24205 OPTAB_DIRECT);
24207 else
24209 rtx reg = convert_modes (mode, QImode, val, true);
24211 if (!TARGET_PARTIAL_REG_STALL)
24212 if (mode == SImode)
24213 emit_insn (gen_movsi_insv_1 (reg, reg));
24214 else
24215 emit_insn (gen_movdi_insv_1 (reg, reg));
24216 else
24218 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24219 NULL, 1, OPTAB_DIRECT);
24220 reg =
24221 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24223 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24224 NULL, 1, OPTAB_DIRECT);
24225 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24226 if (mode == SImode)
24227 return reg;
24228 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24229 NULL, 1, OPTAB_DIRECT);
24230 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24231 return reg;
24235 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24236 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24237 alignment from ALIGN to DESIRED_ALIGN. */
24238 static rtx
24239 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24240 int align)
24242 rtx promoted_val;
24244 if (TARGET_64BIT
24245 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24246 promoted_val = promote_duplicated_reg (DImode, val);
24247 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24248 promoted_val = promote_duplicated_reg (SImode, val);
24249 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24250 promoted_val = promote_duplicated_reg (HImode, val);
24251 else
24252 promoted_val = val;
24254 return promoted_val;
24257 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24258 operations when profitable. The code depends upon architecture, block size
24259 and alignment, but always has one of the following overall structures:
24261 Aligned move sequence:
24263 1) Prologue guard: Conditional that jumps up to epilogues for small
24264 blocks that can be handled by epilogue alone. This is faster
24265 but also needed for correctness, since prologue assume the block
24266 is larger than the desired alignment.
24268 Optional dynamic check for size and libcall for large
24269 blocks is emitted here too, with -minline-stringops-dynamically.
24271 2) Prologue: copy first few bytes in order to get destination
24272 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24273 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24274 copied. We emit either a jump tree on power of two sized
24275 blocks, or a byte loop.
24277 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24278 with specified algorithm.
24280 4) Epilogue: code copying tail of the block that is too small to be
24281 handled by main body (or up to size guarded by prologue guard).
24283 Misaligned move sequence
24285 1) missaligned move prologue/epilogue containing:
24286 a) Prologue handling small memory blocks and jumping to done_label
24287 (skipped if blocks are known to be large enough)
24288 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24289 needed by single possibly misaligned move
24290 (skipped if alignment is not needed)
24291 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24293 2) Zero size guard dispatching to done_label, if needed
24295 3) dispatch to library call, if needed,
24297 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24298 with specified algorithm. */
24299 bool
24300 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24301 rtx align_exp, rtx expected_align_exp,
24302 rtx expected_size_exp, rtx min_size_exp,
24303 rtx max_size_exp, rtx probable_max_size_exp,
24304 bool issetmem)
24306 rtx destreg;
24307 rtx srcreg = NULL;
24308 rtx label = NULL;
24309 rtx tmp;
24310 rtx jump_around_label = NULL;
24311 HOST_WIDE_INT align = 1;
24312 unsigned HOST_WIDE_INT count = 0;
24313 HOST_WIDE_INT expected_size = -1;
24314 int size_needed = 0, epilogue_size_needed;
24315 int desired_align = 0, align_bytes = 0;
24316 enum stringop_alg alg;
24317 rtx promoted_val = NULL;
24318 rtx vec_promoted_val = NULL;
24319 bool force_loopy_epilogue = false;
24320 int dynamic_check;
24321 bool need_zero_guard = false;
24322 bool noalign;
24323 enum machine_mode move_mode = VOIDmode;
24324 int unroll_factor = 1;
24325 /* TODO: Once value ranges are available, fill in proper data. */
24326 unsigned HOST_WIDE_INT min_size = 0;
24327 unsigned HOST_WIDE_INT max_size = -1;
24328 unsigned HOST_WIDE_INT probable_max_size = -1;
24329 bool misaligned_prologue_used = false;
24331 if (CONST_INT_P (align_exp))
24332 align = INTVAL (align_exp);
24333 /* i386 can do misaligned access on reasonably increased cost. */
24334 if (CONST_INT_P (expected_align_exp)
24335 && INTVAL (expected_align_exp) > align)
24336 align = INTVAL (expected_align_exp);
24337 /* ALIGN is the minimum of destination and source alignment, but we care here
24338 just about destination alignment. */
24339 else if (!issetmem
24340 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24341 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24343 if (CONST_INT_P (count_exp))
24345 min_size = max_size = probable_max_size = count = expected_size
24346 = INTVAL (count_exp);
24347 /* When COUNT is 0, there is nothing to do. */
24348 if (!count)
24349 return true;
24351 else
24353 if (min_size_exp)
24354 min_size = INTVAL (min_size_exp);
24355 if (max_size_exp)
24356 max_size = INTVAL (max_size_exp);
24357 if (probable_max_size_exp)
24358 probable_max_size = INTVAL (probable_max_size_exp);
24359 if (CONST_INT_P (expected_size_exp))
24360 expected_size = INTVAL (expected_size_exp);
24363 /* Make sure we don't need to care about overflow later on. */
24364 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24365 return false;
24367 /* Step 0: Decide on preferred algorithm, desired alignment and
24368 size of chunks to be copied by main loop. */
24369 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24370 issetmem,
24371 issetmem && val_exp == const0_rtx,
24372 &dynamic_check, &noalign);
24373 if (alg == libcall)
24374 return false;
24375 gcc_assert (alg != no_stringop);
24377 /* For now vector-version of memset is generated only for memory zeroing, as
24378 creating of promoted vector value is very cheap in this case. */
24379 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24380 alg = unrolled_loop;
24382 if (!count)
24383 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24384 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24385 if (!issetmem)
24386 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24388 unroll_factor = 1;
24389 move_mode = word_mode;
24390 switch (alg)
24392 case libcall:
24393 case no_stringop:
24394 case last_alg:
24395 gcc_unreachable ();
24396 case loop_1_byte:
24397 need_zero_guard = true;
24398 move_mode = QImode;
24399 break;
24400 case loop:
24401 need_zero_guard = true;
24402 break;
24403 case unrolled_loop:
24404 need_zero_guard = true;
24405 unroll_factor = (TARGET_64BIT ? 4 : 2);
24406 break;
24407 case vector_loop:
24408 need_zero_guard = true;
24409 unroll_factor = 4;
24410 /* Find the widest supported mode. */
24411 move_mode = word_mode;
24412 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24413 != CODE_FOR_nothing)
24414 move_mode = GET_MODE_WIDER_MODE (move_mode);
24416 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24417 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24418 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24420 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24421 move_mode = mode_for_vector (word_mode, nunits);
24422 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24423 move_mode = word_mode;
24425 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24426 break;
24427 case rep_prefix_8_byte:
24428 move_mode = DImode;
24429 break;
24430 case rep_prefix_4_byte:
24431 move_mode = SImode;
24432 break;
24433 case rep_prefix_1_byte:
24434 move_mode = QImode;
24435 break;
24437 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24438 epilogue_size_needed = size_needed;
24440 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24441 if (!TARGET_ALIGN_STRINGOPS || noalign)
24442 align = desired_align;
24444 /* Step 1: Prologue guard. */
24446 /* Alignment code needs count to be in register. */
24447 if (CONST_INT_P (count_exp) && desired_align > align)
24449 if (INTVAL (count_exp) > desired_align
24450 && INTVAL (count_exp) > size_needed)
24452 align_bytes
24453 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24454 if (align_bytes <= 0)
24455 align_bytes = 0;
24456 else
24457 align_bytes = desired_align - align_bytes;
24459 if (align_bytes == 0)
24460 count_exp = force_reg (counter_mode (count_exp), count_exp);
24462 gcc_assert (desired_align >= 1 && align >= 1);
24464 /* Misaligned move sequences handle both prologue and epilogue at once.
24465 Default code generation results in a smaller code for large alignments
24466 and also avoids redundant job when sizes are known precisely. */
24467 misaligned_prologue_used
24468 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24469 && MAX (desired_align, epilogue_size_needed) <= 32
24470 && desired_align <= epilogue_size_needed
24471 && ((desired_align > align && !align_bytes)
24472 || (!count && epilogue_size_needed > 1)));
24474 /* Do the cheap promotion to allow better CSE across the
24475 main loop and epilogue (ie one load of the big constant in the
24476 front of all code.
24477 For now the misaligned move sequences do not have fast path
24478 without broadcasting. */
24479 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24481 if (alg == vector_loop)
24483 gcc_assert (val_exp == const0_rtx);
24484 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24485 promoted_val = promote_duplicated_reg_to_size (val_exp,
24486 GET_MODE_SIZE (word_mode),
24487 desired_align, align);
24489 else
24491 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24492 desired_align, align);
24495 /* Misaligned move sequences handles both prologues and epilogues at once.
24496 Default code generation results in smaller code for large alignments and
24497 also avoids redundant job when sizes are known precisely. */
24498 if (misaligned_prologue_used)
24500 /* Misaligned move prologue handled small blocks by itself. */
24501 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24502 (dst, src, &destreg, &srcreg,
24503 move_mode, promoted_val, vec_promoted_val,
24504 &count_exp,
24505 &jump_around_label,
24506 desired_align < align
24507 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24508 desired_align, align, &min_size, dynamic_check, issetmem);
24509 if (!issetmem)
24510 src = change_address (src, BLKmode, srcreg);
24511 dst = change_address (dst, BLKmode, destreg);
24512 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24513 epilogue_size_needed = 0;
24514 if (need_zero_guard && !min_size)
24516 /* It is possible that we copied enough so the main loop will not
24517 execute. */
24518 gcc_assert (size_needed > 1);
24519 if (jump_around_label == NULL_RTX)
24520 jump_around_label = gen_label_rtx ();
24521 emit_cmp_and_jump_insns (count_exp,
24522 GEN_INT (size_needed),
24523 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24524 if (expected_size == -1
24525 || expected_size < (desired_align - align) / 2 + size_needed)
24526 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24527 else
24528 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24531 /* Ensure that alignment prologue won't copy past end of block. */
24532 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24534 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24535 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24536 Make sure it is power of 2. */
24537 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24539 /* To improve performance of small blocks, we jump around the VAL
24540 promoting mode. This mean that if the promoted VAL is not constant,
24541 we might not use it in the epilogue and have to use byte
24542 loop variant. */
24543 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24544 force_loopy_epilogue = true;
24545 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24546 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24548 /* If main algorithm works on QImode, no epilogue is needed.
24549 For small sizes just don't align anything. */
24550 if (size_needed == 1)
24551 desired_align = align;
24552 else
24553 goto epilogue;
24555 else if (!count
24556 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24558 label = gen_label_rtx ();
24559 emit_cmp_and_jump_insns (count_exp,
24560 GEN_INT (epilogue_size_needed),
24561 LTU, 0, counter_mode (count_exp), 1, label);
24562 if (expected_size == -1 || expected_size < epilogue_size_needed)
24563 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24564 else
24565 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24569 /* Emit code to decide on runtime whether library call or inline should be
24570 used. */
24571 if (dynamic_check != -1)
24573 if (!issetmem && CONST_INT_P (count_exp))
24575 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24577 emit_block_move_via_libcall (dst, src, count_exp, false);
24578 count_exp = const0_rtx;
24579 goto epilogue;
24582 else
24584 rtx hot_label = gen_label_rtx ();
24585 if (jump_around_label == NULL_RTX)
24586 jump_around_label = gen_label_rtx ();
24587 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24588 LEU, 0, counter_mode (count_exp),
24589 1, hot_label);
24590 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24591 if (issetmem)
24592 set_storage_via_libcall (dst, count_exp, val_exp, false);
24593 else
24594 emit_block_move_via_libcall (dst, src, count_exp, false);
24595 emit_jump (jump_around_label);
24596 emit_label (hot_label);
24600 /* Step 2: Alignment prologue. */
24601 /* Do the expensive promotion once we branched off the small blocks. */
24602 if (issetmem && !promoted_val)
24603 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24604 desired_align, align);
24606 if (desired_align > align && !misaligned_prologue_used)
24608 if (align_bytes == 0)
24610 /* Except for the first move in prologue, we no longer know
24611 constant offset in aliasing info. It don't seems to worth
24612 the pain to maintain it for the first move, so throw away
24613 the info early. */
24614 dst = change_address (dst, BLKmode, destreg);
24615 if (!issetmem)
24616 src = change_address (src, BLKmode, srcreg);
24617 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24618 promoted_val, vec_promoted_val,
24619 count_exp, align, desired_align,
24620 issetmem);
24621 /* At most desired_align - align bytes are copied. */
24622 if (min_size < (unsigned)(desired_align - align))
24623 min_size = 0;
24624 else
24625 min_size -= desired_align - align;
24627 else
24629 /* If we know how many bytes need to be stored before dst is
24630 sufficiently aligned, maintain aliasing info accurately. */
24631 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24632 srcreg,
24633 promoted_val,
24634 vec_promoted_val,
24635 desired_align,
24636 align_bytes,
24637 issetmem);
24639 count_exp = plus_constant (counter_mode (count_exp),
24640 count_exp, -align_bytes);
24641 count -= align_bytes;
24642 min_size -= align_bytes;
24643 max_size -= align_bytes;
24645 if (need_zero_guard
24646 && !min_size
24647 && (count < (unsigned HOST_WIDE_INT) size_needed
24648 || (align_bytes == 0
24649 && count < ((unsigned HOST_WIDE_INT) size_needed
24650 + desired_align - align))))
24652 /* It is possible that we copied enough so the main loop will not
24653 execute. */
24654 gcc_assert (size_needed > 1);
24655 if (label == NULL_RTX)
24656 label = gen_label_rtx ();
24657 emit_cmp_and_jump_insns (count_exp,
24658 GEN_INT (size_needed),
24659 LTU, 0, counter_mode (count_exp), 1, label);
24660 if (expected_size == -1
24661 || expected_size < (desired_align - align) / 2 + size_needed)
24662 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24663 else
24664 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24667 if (label && size_needed == 1)
24669 emit_label (label);
24670 LABEL_NUSES (label) = 1;
24671 label = NULL;
24672 epilogue_size_needed = 1;
24673 if (issetmem)
24674 promoted_val = val_exp;
24676 else if (label == NULL_RTX && !misaligned_prologue_used)
24677 epilogue_size_needed = size_needed;
24679 /* Step 3: Main loop. */
24681 switch (alg)
24683 case libcall:
24684 case no_stringop:
24685 case last_alg:
24686 gcc_unreachable ();
24687 case loop_1_byte:
24688 case loop:
24689 case unrolled_loop:
24690 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24691 count_exp, move_mode, unroll_factor,
24692 expected_size, issetmem);
24693 break;
24694 case vector_loop:
24695 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24696 vec_promoted_val, count_exp, move_mode,
24697 unroll_factor, expected_size, issetmem);
24698 break;
24699 case rep_prefix_8_byte:
24700 case rep_prefix_4_byte:
24701 case rep_prefix_1_byte:
24702 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24703 val_exp, count_exp, move_mode, issetmem);
24704 break;
24706 /* Adjust properly the offset of src and dest memory for aliasing. */
24707 if (CONST_INT_P (count_exp))
24709 if (!issetmem)
24710 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24711 (count / size_needed) * size_needed);
24712 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24713 (count / size_needed) * size_needed);
24715 else
24717 if (!issetmem)
24718 src = change_address (src, BLKmode, srcreg);
24719 dst = change_address (dst, BLKmode, destreg);
24722 /* Step 4: Epilogue to copy the remaining bytes. */
24723 epilogue:
24724 if (label)
24726 /* When the main loop is done, COUNT_EXP might hold original count,
24727 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24728 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24729 bytes. Compensate if needed. */
24731 if (size_needed < epilogue_size_needed)
24733 tmp =
24734 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24735 GEN_INT (size_needed - 1), count_exp, 1,
24736 OPTAB_DIRECT);
24737 if (tmp != count_exp)
24738 emit_move_insn (count_exp, tmp);
24740 emit_label (label);
24741 LABEL_NUSES (label) = 1;
24744 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24746 if (force_loopy_epilogue)
24747 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24748 epilogue_size_needed);
24749 else
24751 if (issetmem)
24752 expand_setmem_epilogue (dst, destreg, promoted_val,
24753 vec_promoted_val, count_exp,
24754 epilogue_size_needed);
24755 else
24756 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24757 epilogue_size_needed);
24760 if (jump_around_label)
24761 emit_label (jump_around_label);
24762 return true;
24766 /* Expand the appropriate insns for doing strlen if not just doing
24767 repnz; scasb
24769 out = result, initialized with the start address
24770 align_rtx = alignment of the address.
24771 scratch = scratch register, initialized with the startaddress when
24772 not aligned, otherwise undefined
24774 This is just the body. It needs the initializations mentioned above and
24775 some address computing at the end. These things are done in i386.md. */
24777 static void
24778 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24780 int align;
24781 rtx tmp;
24782 rtx align_2_label = NULL_RTX;
24783 rtx align_3_label = NULL_RTX;
24784 rtx align_4_label = gen_label_rtx ();
24785 rtx end_0_label = gen_label_rtx ();
24786 rtx mem;
24787 rtx tmpreg = gen_reg_rtx (SImode);
24788 rtx scratch = gen_reg_rtx (SImode);
24789 rtx cmp;
24791 align = 0;
24792 if (CONST_INT_P (align_rtx))
24793 align = INTVAL (align_rtx);
24795 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24797 /* Is there a known alignment and is it less than 4? */
24798 if (align < 4)
24800 rtx scratch1 = gen_reg_rtx (Pmode);
24801 emit_move_insn (scratch1, out);
24802 /* Is there a known alignment and is it not 2? */
24803 if (align != 2)
24805 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24806 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24808 /* Leave just the 3 lower bits. */
24809 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24810 NULL_RTX, 0, OPTAB_WIDEN);
24812 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24813 Pmode, 1, align_4_label);
24814 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24815 Pmode, 1, align_2_label);
24816 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24817 Pmode, 1, align_3_label);
24819 else
24821 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24822 check if is aligned to 4 - byte. */
24824 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24825 NULL_RTX, 0, OPTAB_WIDEN);
24827 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24828 Pmode, 1, align_4_label);
24831 mem = change_address (src, QImode, out);
24833 /* Now compare the bytes. */
24835 /* Compare the first n unaligned byte on a byte per byte basis. */
24836 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24837 QImode, 1, end_0_label);
24839 /* Increment the address. */
24840 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24842 /* Not needed with an alignment of 2 */
24843 if (align != 2)
24845 emit_label (align_2_label);
24847 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24848 end_0_label);
24850 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24852 emit_label (align_3_label);
24855 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24856 end_0_label);
24858 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24861 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24862 align this loop. It gives only huge programs, but does not help to
24863 speed up. */
24864 emit_label (align_4_label);
24866 mem = change_address (src, SImode, out);
24867 emit_move_insn (scratch, mem);
24868 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24870 /* This formula yields a nonzero result iff one of the bytes is zero.
24871 This saves three branches inside loop and many cycles. */
24873 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24874 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24875 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24876 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24877 gen_int_mode (0x80808080, SImode)));
24878 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24879 align_4_label);
24881 if (TARGET_CMOVE)
24883 rtx reg = gen_reg_rtx (SImode);
24884 rtx reg2 = gen_reg_rtx (Pmode);
24885 emit_move_insn (reg, tmpreg);
24886 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24888 /* If zero is not in the first two bytes, move two bytes forward. */
24889 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24890 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24891 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24892 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24893 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24894 reg,
24895 tmpreg)));
24896 /* Emit lea manually to avoid clobbering of flags. */
24897 emit_insn (gen_rtx_SET (SImode, reg2,
24898 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24900 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24901 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24902 emit_insn (gen_rtx_SET (VOIDmode, out,
24903 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24904 reg2,
24905 out)));
24907 else
24909 rtx end_2_label = gen_label_rtx ();
24910 /* Is zero in the first two bytes? */
24912 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24913 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24914 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24915 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24916 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24917 pc_rtx);
24918 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24919 JUMP_LABEL (tmp) = end_2_label;
24921 /* Not in the first two. Move two bytes forward. */
24922 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24923 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24925 emit_label (end_2_label);
24929 /* Avoid branch in fixing the byte. */
24930 tmpreg = gen_lowpart (QImode, tmpreg);
24931 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24932 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24933 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24934 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24936 emit_label (end_0_label);
24939 /* Expand strlen. */
24941 bool
24942 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24944 rtx addr, scratch1, scratch2, scratch3, scratch4;
24946 /* The generic case of strlen expander is long. Avoid it's
24947 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24949 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24950 && !TARGET_INLINE_ALL_STRINGOPS
24951 && !optimize_insn_for_size_p ()
24952 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24953 return false;
24955 addr = force_reg (Pmode, XEXP (src, 0));
24956 scratch1 = gen_reg_rtx (Pmode);
24958 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24959 && !optimize_insn_for_size_p ())
24961 /* Well it seems that some optimizer does not combine a call like
24962 foo(strlen(bar), strlen(bar));
24963 when the move and the subtraction is done here. It does calculate
24964 the length just once when these instructions are done inside of
24965 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24966 often used and I use one fewer register for the lifetime of
24967 output_strlen_unroll() this is better. */
24969 emit_move_insn (out, addr);
24971 ix86_expand_strlensi_unroll_1 (out, src, align);
24973 /* strlensi_unroll_1 returns the address of the zero at the end of
24974 the string, like memchr(), so compute the length by subtracting
24975 the start address. */
24976 emit_insn (ix86_gen_sub3 (out, out, addr));
24978 else
24980 rtx unspec;
24982 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24983 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24984 return false;
24986 scratch2 = gen_reg_rtx (Pmode);
24987 scratch3 = gen_reg_rtx (Pmode);
24988 scratch4 = force_reg (Pmode, constm1_rtx);
24990 emit_move_insn (scratch3, addr);
24991 eoschar = force_reg (QImode, eoschar);
24993 src = replace_equiv_address_nv (src, scratch3);
24995 /* If .md starts supporting :P, this can be done in .md. */
24996 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24997 scratch4), UNSPEC_SCAS);
24998 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24999 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25000 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25002 return true;
25005 /* For given symbol (function) construct code to compute address of it's PLT
25006 entry in large x86-64 PIC model. */
25007 static rtx
25008 construct_plt_address (rtx symbol)
25010 rtx tmp, unspec;
25012 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25013 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25014 gcc_assert (Pmode == DImode);
25016 tmp = gen_reg_rtx (Pmode);
25017 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25019 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25020 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25021 return tmp;
25025 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25026 rtx callarg2,
25027 rtx pop, bool sibcall)
25029 unsigned int const cregs_size
25030 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25031 rtx vec[3 + cregs_size];
25032 rtx use = NULL, call;
25033 unsigned int vec_len = 0;
25035 if (pop == const0_rtx)
25036 pop = NULL;
25037 gcc_assert (!TARGET_64BIT || !pop);
25039 if (TARGET_MACHO && !TARGET_64BIT)
25041 #if TARGET_MACHO
25042 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25043 fnaddr = machopic_indirect_call_target (fnaddr);
25044 #endif
25046 else
25048 /* Static functions and indirect calls don't need the pic register. */
25049 if (flag_pic
25050 && (!TARGET_64BIT
25051 || (ix86_cmodel == CM_LARGE_PIC
25052 && DEFAULT_ABI != MS_ABI))
25053 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25054 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25055 use_reg (&use, pic_offset_table_rtx);
25058 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25060 rtx al = gen_rtx_REG (QImode, AX_REG);
25061 emit_move_insn (al, callarg2);
25062 use_reg (&use, al);
25065 if (ix86_cmodel == CM_LARGE_PIC
25066 && !TARGET_PECOFF
25067 && MEM_P (fnaddr)
25068 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25069 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25070 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25071 else if (sibcall
25072 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25073 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25075 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25076 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25079 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25080 if (retval)
25081 call = gen_rtx_SET (VOIDmode, retval, call);
25082 vec[vec_len++] = call;
25084 if (pop)
25086 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25087 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25088 vec[vec_len++] = pop;
25091 if (TARGET_64BIT_MS_ABI
25092 && (!callarg2 || INTVAL (callarg2) != -2))
25094 unsigned i;
25096 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
25097 UNSPEC_MS_TO_SYSV_CALL);
25099 for (i = 0; i < cregs_size; i++)
25101 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25102 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25104 vec[vec_len++]
25105 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
25109 if (vec_len > 1)
25110 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25111 call = emit_call_insn (call);
25112 if (use)
25113 CALL_INSN_FUNCTION_USAGE (call) = use;
25115 return call;
25118 /* Output the assembly for a call instruction. */
25120 const char *
25121 ix86_output_call_insn (rtx insn, rtx call_op)
25123 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25124 bool seh_nop_p = false;
25125 const char *xasm;
25127 if (SIBLING_CALL_P (insn))
25129 if (direct_p)
25130 xasm = "jmp\t%P0";
25131 /* SEH epilogue detection requires the indirect branch case
25132 to include REX.W. */
25133 else if (TARGET_SEH)
25134 xasm = "rex.W jmp %A0";
25135 else
25136 xasm = "jmp\t%A0";
25138 output_asm_insn (xasm, &call_op);
25139 return "";
25142 /* SEH unwinding can require an extra nop to be emitted in several
25143 circumstances. Determine if we have one of those. */
25144 if (TARGET_SEH)
25146 rtx i;
25148 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25150 /* If we get to another real insn, we don't need the nop. */
25151 if (INSN_P (i))
25152 break;
25154 /* If we get to the epilogue note, prevent a catch region from
25155 being adjacent to the standard epilogue sequence. If non-
25156 call-exceptions, we'll have done this during epilogue emission. */
25157 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25158 && !flag_non_call_exceptions
25159 && !can_throw_internal (insn))
25161 seh_nop_p = true;
25162 break;
25166 /* If we didn't find a real insn following the call, prevent the
25167 unwinder from looking into the next function. */
25168 if (i == NULL)
25169 seh_nop_p = true;
25172 if (direct_p)
25173 xasm = "call\t%P0";
25174 else
25175 xasm = "call\t%A0";
25177 output_asm_insn (xasm, &call_op);
25179 if (seh_nop_p)
25180 return "nop";
25182 return "";
25185 /* Clear stack slot assignments remembered from previous functions.
25186 This is called from INIT_EXPANDERS once before RTL is emitted for each
25187 function. */
25189 static struct machine_function *
25190 ix86_init_machine_status (void)
25192 struct machine_function *f;
25194 f = ggc_cleared_alloc<machine_function> ();
25195 f->use_fast_prologue_epilogue_nregs = -1;
25196 f->call_abi = ix86_abi;
25198 return f;
25201 /* Return a MEM corresponding to a stack slot with mode MODE.
25202 Allocate a new slot if necessary.
25204 The RTL for a function can have several slots available: N is
25205 which slot to use. */
25208 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25210 struct stack_local_entry *s;
25212 gcc_assert (n < MAX_386_STACK_LOCALS);
25214 for (s = ix86_stack_locals; s; s = s->next)
25215 if (s->mode == mode && s->n == n)
25216 return validize_mem (copy_rtx (s->rtl));
25218 s = ggc_alloc<stack_local_entry> ();
25219 s->n = n;
25220 s->mode = mode;
25221 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25223 s->next = ix86_stack_locals;
25224 ix86_stack_locals = s;
25225 return validize_mem (s->rtl);
25228 static void
25229 ix86_instantiate_decls (void)
25231 struct stack_local_entry *s;
25233 for (s = ix86_stack_locals; s; s = s->next)
25234 if (s->rtl != NULL_RTX)
25235 instantiate_decl_rtl (s->rtl);
25238 /* Check whether x86 address PARTS is a pc-relative address. */
25240 static bool
25241 rip_relative_addr_p (struct ix86_address *parts)
25243 rtx base, index, disp;
25245 base = parts->base;
25246 index = parts->index;
25247 disp = parts->disp;
25249 if (disp && !base && !index)
25251 if (TARGET_64BIT)
25253 rtx symbol = disp;
25255 if (GET_CODE (disp) == CONST)
25256 symbol = XEXP (disp, 0);
25257 if (GET_CODE (symbol) == PLUS
25258 && CONST_INT_P (XEXP (symbol, 1)))
25259 symbol = XEXP (symbol, 0);
25261 if (GET_CODE (symbol) == LABEL_REF
25262 || (GET_CODE (symbol) == SYMBOL_REF
25263 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25264 || (GET_CODE (symbol) == UNSPEC
25265 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25266 || XINT (symbol, 1) == UNSPEC_PCREL
25267 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25268 return true;
25271 return false;
25274 /* Calculate the length of the memory address in the instruction encoding.
25275 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25276 or other prefixes. We never generate addr32 prefix for LEA insn. */
25279 memory_address_length (rtx addr, bool lea)
25281 struct ix86_address parts;
25282 rtx base, index, disp;
25283 int len;
25284 int ok;
25286 if (GET_CODE (addr) == PRE_DEC
25287 || GET_CODE (addr) == POST_INC
25288 || GET_CODE (addr) == PRE_MODIFY
25289 || GET_CODE (addr) == POST_MODIFY)
25290 return 0;
25292 ok = ix86_decompose_address (addr, &parts);
25293 gcc_assert (ok);
25295 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25297 /* If this is not LEA instruction, add the length of addr32 prefix. */
25298 if (TARGET_64BIT && !lea
25299 && (SImode_address_operand (addr, VOIDmode)
25300 || (parts.base && GET_MODE (parts.base) == SImode)
25301 || (parts.index && GET_MODE (parts.index) == SImode)))
25302 len++;
25304 base = parts.base;
25305 index = parts.index;
25306 disp = parts.disp;
25308 if (base && GET_CODE (base) == SUBREG)
25309 base = SUBREG_REG (base);
25310 if (index && GET_CODE (index) == SUBREG)
25311 index = SUBREG_REG (index);
25313 gcc_assert (base == NULL_RTX || REG_P (base));
25314 gcc_assert (index == NULL_RTX || REG_P (index));
25316 /* Rule of thumb:
25317 - esp as the base always wants an index,
25318 - ebp as the base always wants a displacement,
25319 - r12 as the base always wants an index,
25320 - r13 as the base always wants a displacement. */
25322 /* Register Indirect. */
25323 if (base && !index && !disp)
25325 /* esp (for its index) and ebp (for its displacement) need
25326 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25327 code. */
25328 if (base == arg_pointer_rtx
25329 || base == frame_pointer_rtx
25330 || REGNO (base) == SP_REG
25331 || REGNO (base) == BP_REG
25332 || REGNO (base) == R12_REG
25333 || REGNO (base) == R13_REG)
25334 len++;
25337 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25338 is not disp32, but disp32(%rip), so for disp32
25339 SIB byte is needed, unless print_operand_address
25340 optimizes it into disp32(%rip) or (%rip) is implied
25341 by UNSPEC. */
25342 else if (disp && !base && !index)
25344 len += 4;
25345 if (rip_relative_addr_p (&parts))
25346 len++;
25348 else
25350 /* Find the length of the displacement constant. */
25351 if (disp)
25353 if (base && satisfies_constraint_K (disp))
25354 len += 1;
25355 else
25356 len += 4;
25358 /* ebp always wants a displacement. Similarly r13. */
25359 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25360 len++;
25362 /* An index requires the two-byte modrm form.... */
25363 if (index
25364 /* ...like esp (or r12), which always wants an index. */
25365 || base == arg_pointer_rtx
25366 || base == frame_pointer_rtx
25367 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25368 len++;
25371 return len;
25374 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25375 is set, expect that insn have 8bit immediate alternative. */
25377 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25379 int len = 0;
25380 int i;
25381 extract_insn_cached (insn);
25382 for (i = recog_data.n_operands - 1; i >= 0; --i)
25383 if (CONSTANT_P (recog_data.operand[i]))
25385 enum attr_mode mode = get_attr_mode (insn);
25387 gcc_assert (!len);
25388 if (shortform && CONST_INT_P (recog_data.operand[i]))
25390 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25391 switch (mode)
25393 case MODE_QI:
25394 len = 1;
25395 continue;
25396 case MODE_HI:
25397 ival = trunc_int_for_mode (ival, HImode);
25398 break;
25399 case MODE_SI:
25400 ival = trunc_int_for_mode (ival, SImode);
25401 break;
25402 default:
25403 break;
25405 if (IN_RANGE (ival, -128, 127))
25407 len = 1;
25408 continue;
25411 switch (mode)
25413 case MODE_QI:
25414 len = 1;
25415 break;
25416 case MODE_HI:
25417 len = 2;
25418 break;
25419 case MODE_SI:
25420 len = 4;
25421 break;
25422 /* Immediates for DImode instructions are encoded
25423 as 32bit sign extended values. */
25424 case MODE_DI:
25425 len = 4;
25426 break;
25427 default:
25428 fatal_insn ("unknown insn mode", insn);
25431 return len;
25434 /* Compute default value for "length_address" attribute. */
25436 ix86_attr_length_address_default (rtx insn)
25438 int i;
25440 if (get_attr_type (insn) == TYPE_LEA)
25442 rtx set = PATTERN (insn), addr;
25444 if (GET_CODE (set) == PARALLEL)
25445 set = XVECEXP (set, 0, 0);
25447 gcc_assert (GET_CODE (set) == SET);
25449 addr = SET_SRC (set);
25451 return memory_address_length (addr, true);
25454 extract_insn_cached (insn);
25455 for (i = recog_data.n_operands - 1; i >= 0; --i)
25456 if (MEM_P (recog_data.operand[i]))
25458 constrain_operands_cached (reload_completed);
25459 if (which_alternative != -1)
25461 const char *constraints = recog_data.constraints[i];
25462 int alt = which_alternative;
25464 while (*constraints == '=' || *constraints == '+')
25465 constraints++;
25466 while (alt-- > 0)
25467 while (*constraints++ != ',')
25469 /* Skip ignored operands. */
25470 if (*constraints == 'X')
25471 continue;
25473 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25475 return 0;
25478 /* Compute default value for "length_vex" attribute. It includes
25479 2 or 3 byte VEX prefix and 1 opcode byte. */
25482 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25484 int i;
25486 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25487 byte VEX prefix. */
25488 if (!has_0f_opcode || has_vex_w)
25489 return 3 + 1;
25491 /* We can always use 2 byte VEX prefix in 32bit. */
25492 if (!TARGET_64BIT)
25493 return 2 + 1;
25495 extract_insn_cached (insn);
25497 for (i = recog_data.n_operands - 1; i >= 0; --i)
25498 if (REG_P (recog_data.operand[i]))
25500 /* REX.W bit uses 3 byte VEX prefix. */
25501 if (GET_MODE (recog_data.operand[i]) == DImode
25502 && GENERAL_REG_P (recog_data.operand[i]))
25503 return 3 + 1;
25505 else
25507 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25508 if (MEM_P (recog_data.operand[i])
25509 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25510 return 3 + 1;
25513 return 2 + 1;
25516 /* Return the maximum number of instructions a cpu can issue. */
25518 static int
25519 ix86_issue_rate (void)
25521 switch (ix86_tune)
25523 case PROCESSOR_PENTIUM:
25524 case PROCESSOR_BONNELL:
25525 case PROCESSOR_SILVERMONT:
25526 case PROCESSOR_INTEL:
25527 case PROCESSOR_K6:
25528 case PROCESSOR_BTVER2:
25529 case PROCESSOR_PENTIUM4:
25530 case PROCESSOR_NOCONA:
25531 return 2;
25533 case PROCESSOR_PENTIUMPRO:
25534 case PROCESSOR_ATHLON:
25535 case PROCESSOR_K8:
25536 case PROCESSOR_AMDFAM10:
25537 case PROCESSOR_GENERIC:
25538 case PROCESSOR_BTVER1:
25539 return 3;
25541 case PROCESSOR_BDVER1:
25542 case PROCESSOR_BDVER2:
25543 case PROCESSOR_BDVER3:
25544 case PROCESSOR_BDVER4:
25545 case PROCESSOR_CORE2:
25546 case PROCESSOR_NEHALEM:
25547 case PROCESSOR_SANDYBRIDGE:
25548 case PROCESSOR_HASWELL:
25549 return 4;
25551 default:
25552 return 1;
25556 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25557 by DEP_INSN and nothing set by DEP_INSN. */
25559 static bool
25560 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25562 rtx set, set2;
25564 /* Simplify the test for uninteresting insns. */
25565 if (insn_type != TYPE_SETCC
25566 && insn_type != TYPE_ICMOV
25567 && insn_type != TYPE_FCMOV
25568 && insn_type != TYPE_IBR)
25569 return false;
25571 if ((set = single_set (dep_insn)) != 0)
25573 set = SET_DEST (set);
25574 set2 = NULL_RTX;
25576 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25577 && XVECLEN (PATTERN (dep_insn), 0) == 2
25578 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25579 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25581 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25582 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25584 else
25585 return false;
25587 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25588 return false;
25590 /* This test is true if the dependent insn reads the flags but
25591 not any other potentially set register. */
25592 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25593 return false;
25595 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25596 return false;
25598 return true;
25601 /* Return true iff USE_INSN has a memory address with operands set by
25602 SET_INSN. */
25604 bool
25605 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25607 int i;
25608 extract_insn_cached (use_insn);
25609 for (i = recog_data.n_operands - 1; i >= 0; --i)
25610 if (MEM_P (recog_data.operand[i]))
25612 rtx addr = XEXP (recog_data.operand[i], 0);
25613 return modified_in_p (addr, set_insn) != 0;
25615 return false;
25618 /* Helper function for exact_store_load_dependency.
25619 Return true if addr is found in insn. */
25620 static bool
25621 exact_dependency_1 (rtx addr, rtx insn)
25623 enum rtx_code code;
25624 const char *format_ptr;
25625 int i, j;
25627 code = GET_CODE (insn);
25628 switch (code)
25630 case MEM:
25631 if (rtx_equal_p (addr, insn))
25632 return true;
25633 break;
25634 case REG:
25635 CASE_CONST_ANY:
25636 case SYMBOL_REF:
25637 case CODE_LABEL:
25638 case PC:
25639 case CC0:
25640 case EXPR_LIST:
25641 return false;
25642 default:
25643 break;
25646 format_ptr = GET_RTX_FORMAT (code);
25647 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25649 switch (*format_ptr++)
25651 case 'e':
25652 if (exact_dependency_1 (addr, XEXP (insn, i)))
25653 return true;
25654 break;
25655 case 'E':
25656 for (j = 0; j < XVECLEN (insn, i); j++)
25657 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25658 return true;
25659 break;
25662 return false;
25665 /* Return true if there exists exact dependency for store & load, i.e.
25666 the same memory address is used in them. */
25667 static bool
25668 exact_store_load_dependency (rtx store, rtx load)
25670 rtx set1, set2;
25672 set1 = single_set (store);
25673 if (!set1)
25674 return false;
25675 if (!MEM_P (SET_DEST (set1)))
25676 return false;
25677 set2 = single_set (load);
25678 if (!set2)
25679 return false;
25680 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25681 return true;
25682 return false;
25685 static int
25686 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25688 enum attr_type insn_type, dep_insn_type;
25689 enum attr_memory memory;
25690 rtx set, set2;
25691 int dep_insn_code_number;
25693 /* Anti and output dependencies have zero cost on all CPUs. */
25694 if (REG_NOTE_KIND (link) != 0)
25695 return 0;
25697 dep_insn_code_number = recog_memoized (dep_insn);
25699 /* If we can't recognize the insns, we can't really do anything. */
25700 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25701 return cost;
25703 insn_type = get_attr_type (insn);
25704 dep_insn_type = get_attr_type (dep_insn);
25706 switch (ix86_tune)
25708 case PROCESSOR_PENTIUM:
25709 /* Address Generation Interlock adds a cycle of latency. */
25710 if (insn_type == TYPE_LEA)
25712 rtx addr = PATTERN (insn);
25714 if (GET_CODE (addr) == PARALLEL)
25715 addr = XVECEXP (addr, 0, 0);
25717 gcc_assert (GET_CODE (addr) == SET);
25719 addr = SET_SRC (addr);
25720 if (modified_in_p (addr, dep_insn))
25721 cost += 1;
25723 else if (ix86_agi_dependent (dep_insn, insn))
25724 cost += 1;
25726 /* ??? Compares pair with jump/setcc. */
25727 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25728 cost = 0;
25730 /* Floating point stores require value to be ready one cycle earlier. */
25731 if (insn_type == TYPE_FMOV
25732 && get_attr_memory (insn) == MEMORY_STORE
25733 && !ix86_agi_dependent (dep_insn, insn))
25734 cost += 1;
25735 break;
25737 case PROCESSOR_PENTIUMPRO:
25738 /* INT->FP conversion is expensive. */
25739 if (get_attr_fp_int_src (dep_insn))
25740 cost += 5;
25742 /* There is one cycle extra latency between an FP op and a store. */
25743 if (insn_type == TYPE_FMOV
25744 && (set = single_set (dep_insn)) != NULL_RTX
25745 && (set2 = single_set (insn)) != NULL_RTX
25746 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25747 && MEM_P (SET_DEST (set2)))
25748 cost += 1;
25750 memory = get_attr_memory (insn);
25752 /* Show ability of reorder buffer to hide latency of load by executing
25753 in parallel with previous instruction in case
25754 previous instruction is not needed to compute the address. */
25755 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25756 && !ix86_agi_dependent (dep_insn, insn))
25758 /* Claim moves to take one cycle, as core can issue one load
25759 at time and the next load can start cycle later. */
25760 if (dep_insn_type == TYPE_IMOV
25761 || dep_insn_type == TYPE_FMOV)
25762 cost = 1;
25763 else if (cost > 1)
25764 cost--;
25766 break;
25768 case PROCESSOR_K6:
25769 /* The esp dependency is resolved before
25770 the instruction is really finished. */
25771 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25772 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25773 return 1;
25775 /* INT->FP conversion is expensive. */
25776 if (get_attr_fp_int_src (dep_insn))
25777 cost += 5;
25779 memory = get_attr_memory (insn);
25781 /* Show ability of reorder buffer to hide latency of load by executing
25782 in parallel with previous instruction in case
25783 previous instruction is not needed to compute the address. */
25784 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25785 && !ix86_agi_dependent (dep_insn, insn))
25787 /* Claim moves to take one cycle, as core can issue one load
25788 at time and the next load can start cycle later. */
25789 if (dep_insn_type == TYPE_IMOV
25790 || dep_insn_type == TYPE_FMOV)
25791 cost = 1;
25792 else if (cost > 2)
25793 cost -= 2;
25794 else
25795 cost = 1;
25797 break;
25799 case PROCESSOR_AMDFAM10:
25800 case PROCESSOR_BDVER1:
25801 case PROCESSOR_BDVER2:
25802 case PROCESSOR_BDVER3:
25803 case PROCESSOR_BDVER4:
25804 case PROCESSOR_BTVER1:
25805 case PROCESSOR_BTVER2:
25806 case PROCESSOR_GENERIC:
25807 /* Stack engine allows to execute push&pop instructions in parall. */
25808 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25809 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25810 return 0;
25811 /* FALLTHRU */
25813 case PROCESSOR_ATHLON:
25814 case PROCESSOR_K8:
25815 memory = get_attr_memory (insn);
25817 /* Show ability of reorder buffer to hide latency of load by executing
25818 in parallel with previous instruction in case
25819 previous instruction is not needed to compute the address. */
25820 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25821 && !ix86_agi_dependent (dep_insn, insn))
25823 enum attr_unit unit = get_attr_unit (insn);
25824 int loadcost = 3;
25826 /* Because of the difference between the length of integer and
25827 floating unit pipeline preparation stages, the memory operands
25828 for floating point are cheaper.
25830 ??? For Athlon it the difference is most probably 2. */
25831 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25832 loadcost = 3;
25833 else
25834 loadcost = TARGET_ATHLON ? 2 : 0;
25836 if (cost >= loadcost)
25837 cost -= loadcost;
25838 else
25839 cost = 0;
25841 break;
25843 case PROCESSOR_CORE2:
25844 case PROCESSOR_NEHALEM:
25845 case PROCESSOR_SANDYBRIDGE:
25846 case PROCESSOR_HASWELL:
25847 /* Stack engine allows to execute push&pop instructions in parall. */
25848 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25849 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25850 return 0;
25852 memory = get_attr_memory (insn);
25854 /* Show ability of reorder buffer to hide latency of load by executing
25855 in parallel with previous instruction in case
25856 previous instruction is not needed to compute the address. */
25857 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25858 && !ix86_agi_dependent (dep_insn, insn))
25860 if (cost >= 4)
25861 cost -= 4;
25862 else
25863 cost = 0;
25865 break;
25867 case PROCESSOR_SILVERMONT:
25868 case PROCESSOR_INTEL:
25869 if (!reload_completed)
25870 return cost;
25872 /* Increase cost of integer loads. */
25873 memory = get_attr_memory (dep_insn);
25874 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25876 enum attr_unit unit = get_attr_unit (dep_insn);
25877 if (unit == UNIT_INTEGER && cost == 1)
25879 if (memory == MEMORY_LOAD)
25880 cost = 3;
25881 else
25883 /* Increase cost of ld/st for short int types only
25884 because of store forwarding issue. */
25885 rtx set = single_set (dep_insn);
25886 if (set && (GET_MODE (SET_DEST (set)) == QImode
25887 || GET_MODE (SET_DEST (set)) == HImode))
25889 /* Increase cost of store/load insn if exact
25890 dependence exists and it is load insn. */
25891 enum attr_memory insn_memory = get_attr_memory (insn);
25892 if (insn_memory == MEMORY_LOAD
25893 && exact_store_load_dependency (dep_insn, insn))
25894 cost = 3;
25900 default:
25901 break;
25904 return cost;
25907 /* How many alternative schedules to try. This should be as wide as the
25908 scheduling freedom in the DFA, but no wider. Making this value too
25909 large results extra work for the scheduler. */
25911 static int
25912 ia32_multipass_dfa_lookahead (void)
25914 switch (ix86_tune)
25916 case PROCESSOR_PENTIUM:
25917 return 2;
25919 case PROCESSOR_PENTIUMPRO:
25920 case PROCESSOR_K6:
25921 return 1;
25923 case PROCESSOR_BDVER1:
25924 case PROCESSOR_BDVER2:
25925 case PROCESSOR_BDVER3:
25926 case PROCESSOR_BDVER4:
25927 /* We use lookahead value 4 for BD both before and after reload
25928 schedules. Plan is to have value 8 included for O3. */
25929 return 4;
25931 case PROCESSOR_CORE2:
25932 case PROCESSOR_NEHALEM:
25933 case PROCESSOR_SANDYBRIDGE:
25934 case PROCESSOR_HASWELL:
25935 case PROCESSOR_BONNELL:
25936 case PROCESSOR_SILVERMONT:
25937 case PROCESSOR_INTEL:
25938 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25939 as many instructions can be executed on a cycle, i.e.,
25940 issue_rate. I wonder why tuning for many CPUs does not do this. */
25941 if (reload_completed)
25942 return ix86_issue_rate ();
25943 /* Don't use lookahead for pre-reload schedule to save compile time. */
25944 return 0;
25946 default:
25947 return 0;
25951 /* Return true if target platform supports macro-fusion. */
25953 static bool
25954 ix86_macro_fusion_p ()
25956 return TARGET_FUSE_CMP_AND_BRANCH;
25959 /* Check whether current microarchitecture support macro fusion
25960 for insn pair "CONDGEN + CONDJMP". Refer to
25961 "Intel Architectures Optimization Reference Manual". */
25963 static bool
25964 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25966 rtx src, dest;
25967 rtx single_set = single_set (condgen);
25968 enum rtx_code ccode;
25969 rtx compare_set = NULL_RTX, test_if, cond;
25970 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25972 if (get_attr_type (condgen) != TYPE_TEST
25973 && get_attr_type (condgen) != TYPE_ICMP
25974 && get_attr_type (condgen) != TYPE_INCDEC
25975 && get_attr_type (condgen) != TYPE_ALU)
25976 return false;
25978 if (single_set == NULL_RTX
25979 && !TARGET_FUSE_ALU_AND_BRANCH)
25980 return false;
25982 if (single_set != NULL_RTX)
25983 compare_set = single_set;
25984 else
25986 int i;
25987 rtx pat = PATTERN (condgen);
25988 for (i = 0; i < XVECLEN (pat, 0); i++)
25989 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25991 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25992 if (GET_CODE (set_src) == COMPARE)
25993 compare_set = XVECEXP (pat, 0, i);
25994 else
25995 alu_set = XVECEXP (pat, 0, i);
25998 if (compare_set == NULL_RTX)
25999 return false;
26000 src = SET_SRC (compare_set);
26001 if (GET_CODE (src) != COMPARE)
26002 return false;
26004 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26005 supported. */
26006 if ((MEM_P (XEXP (src, 0))
26007 && CONST_INT_P (XEXP (src, 1)))
26008 || (MEM_P (XEXP (src, 1))
26009 && CONST_INT_P (XEXP (src, 0))))
26010 return false;
26012 /* No fusion for RIP-relative address. */
26013 if (MEM_P (XEXP (src, 0)))
26014 addr = XEXP (XEXP (src, 0), 0);
26015 else if (MEM_P (XEXP (src, 1)))
26016 addr = XEXP (XEXP (src, 1), 0);
26018 if (addr) {
26019 ix86_address parts;
26020 int ok = ix86_decompose_address (addr, &parts);
26021 gcc_assert (ok);
26023 if (rip_relative_addr_p (&parts))
26024 return false;
26027 test_if = SET_SRC (pc_set (condjmp));
26028 cond = XEXP (test_if, 0);
26029 ccode = GET_CODE (cond);
26030 /* Check whether conditional jump use Sign or Overflow Flags. */
26031 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26032 && (ccode == GE
26033 || ccode == GT
26034 || ccode == LE
26035 || ccode == LT))
26036 return false;
26038 /* Return true for TYPE_TEST and TYPE_ICMP. */
26039 if (get_attr_type (condgen) == TYPE_TEST
26040 || get_attr_type (condgen) == TYPE_ICMP)
26041 return true;
26043 /* The following is the case that macro-fusion for alu + jmp. */
26044 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26045 return false;
26047 /* No fusion for alu op with memory destination operand. */
26048 dest = SET_DEST (alu_set);
26049 if (MEM_P (dest))
26050 return false;
26052 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26053 supported. */
26054 if (get_attr_type (condgen) == TYPE_INCDEC
26055 && (ccode == GEU
26056 || ccode == GTU
26057 || ccode == LEU
26058 || ccode == LTU))
26059 return false;
26061 return true;
26064 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26065 execution. It is applied if
26066 (1) IMUL instruction is on the top of list;
26067 (2) There exists the only producer of independent IMUL instruction in
26068 ready list.
26069 Return index of IMUL producer if it was found and -1 otherwise. */
26070 static int
26071 do_reorder_for_imul (rtx *ready, int n_ready)
26073 rtx insn, set, insn1, insn2;
26074 sd_iterator_def sd_it;
26075 dep_t dep;
26076 int index = -1;
26077 int i;
26079 if (!TARGET_BONNELL)
26080 return index;
26082 /* Check that IMUL instruction is on the top of ready list. */
26083 insn = ready[n_ready - 1];
26084 set = single_set (insn);
26085 if (!set)
26086 return index;
26087 if (!(GET_CODE (SET_SRC (set)) == MULT
26088 && GET_MODE (SET_SRC (set)) == SImode))
26089 return index;
26091 /* Search for producer of independent IMUL instruction. */
26092 for (i = n_ready - 2; i >= 0; i--)
26094 insn = ready[i];
26095 if (!NONDEBUG_INSN_P (insn))
26096 continue;
26097 /* Skip IMUL instruction. */
26098 insn2 = PATTERN (insn);
26099 if (GET_CODE (insn2) == PARALLEL)
26100 insn2 = XVECEXP (insn2, 0, 0);
26101 if (GET_CODE (insn2) == SET
26102 && GET_CODE (SET_SRC (insn2)) == MULT
26103 && GET_MODE (SET_SRC (insn2)) == SImode)
26104 continue;
26106 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26108 rtx con;
26109 con = DEP_CON (dep);
26110 if (!NONDEBUG_INSN_P (con))
26111 continue;
26112 insn1 = PATTERN (con);
26113 if (GET_CODE (insn1) == PARALLEL)
26114 insn1 = XVECEXP (insn1, 0, 0);
26116 if (GET_CODE (insn1) == SET
26117 && GET_CODE (SET_SRC (insn1)) == MULT
26118 && GET_MODE (SET_SRC (insn1)) == SImode)
26120 sd_iterator_def sd_it1;
26121 dep_t dep1;
26122 /* Check if there is no other dependee for IMUL. */
26123 index = i;
26124 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26126 rtx pro;
26127 pro = DEP_PRO (dep1);
26128 if (!NONDEBUG_INSN_P (pro))
26129 continue;
26130 if (pro != insn)
26131 index = -1;
26133 if (index >= 0)
26134 break;
26137 if (index >= 0)
26138 break;
26140 return index;
26143 /* Try to find the best candidate on the top of ready list if two insns
26144 have the same priority - candidate is best if its dependees were
26145 scheduled earlier. Applied for Silvermont only.
26146 Return true if top 2 insns must be interchanged. */
26147 static bool
26148 swap_top_of_ready_list (rtx *ready, int n_ready)
26150 rtx top = ready[n_ready - 1];
26151 rtx next = ready[n_ready - 2];
26152 rtx set;
26153 sd_iterator_def sd_it;
26154 dep_t dep;
26155 int clock1 = -1;
26156 int clock2 = -1;
26157 #define INSN_TICK(INSN) (HID (INSN)->tick)
26159 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26160 return false;
26162 if (!NONDEBUG_INSN_P (top))
26163 return false;
26164 if (!NONJUMP_INSN_P (top))
26165 return false;
26166 if (!NONDEBUG_INSN_P (next))
26167 return false;
26168 if (!NONJUMP_INSN_P (next))
26169 return false;
26170 set = single_set (top);
26171 if (!set)
26172 return false;
26173 set = single_set (next);
26174 if (!set)
26175 return false;
26177 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26179 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26180 return false;
26181 /* Determine winner more precise. */
26182 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26184 rtx pro;
26185 pro = DEP_PRO (dep);
26186 if (!NONDEBUG_INSN_P (pro))
26187 continue;
26188 if (INSN_TICK (pro) > clock1)
26189 clock1 = INSN_TICK (pro);
26191 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26193 rtx pro;
26194 pro = DEP_PRO (dep);
26195 if (!NONDEBUG_INSN_P (pro))
26196 continue;
26197 if (INSN_TICK (pro) > clock2)
26198 clock2 = INSN_TICK (pro);
26201 if (clock1 == clock2)
26203 /* Determine winner - load must win. */
26204 enum attr_memory memory1, memory2;
26205 memory1 = get_attr_memory (top);
26206 memory2 = get_attr_memory (next);
26207 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26208 return true;
26210 return (bool) (clock2 < clock1);
26212 return false;
26213 #undef INSN_TICK
26216 /* Perform possible reodering of ready list for Atom/Silvermont only.
26217 Return issue rate. */
26218 static int
26219 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26220 int clock_var)
26222 int issue_rate = -1;
26223 int n_ready = *pn_ready;
26224 int i;
26225 rtx insn;
26226 int index = -1;
26228 /* Set up issue rate. */
26229 issue_rate = ix86_issue_rate ();
26231 /* Do reodering for BONNELL/SILVERMONT only. */
26232 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26233 return issue_rate;
26235 /* Nothing to do if ready list contains only 1 instruction. */
26236 if (n_ready <= 1)
26237 return issue_rate;
26239 /* Do reodering for post-reload scheduler only. */
26240 if (!reload_completed)
26241 return issue_rate;
26243 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26245 if (sched_verbose > 1)
26246 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26247 INSN_UID (ready[index]));
26249 /* Put IMUL producer (ready[index]) at the top of ready list. */
26250 insn = ready[index];
26251 for (i = index; i < n_ready - 1; i++)
26252 ready[i] = ready[i + 1];
26253 ready[n_ready - 1] = insn;
26254 return issue_rate;
26256 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26258 if (sched_verbose > 1)
26259 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26260 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26261 /* Swap 2 top elements of ready list. */
26262 insn = ready[n_ready - 1];
26263 ready[n_ready - 1] = ready[n_ready - 2];
26264 ready[n_ready - 2] = insn;
26266 return issue_rate;
26269 static bool
26270 ix86_class_likely_spilled_p (reg_class_t);
26272 /* Returns true if lhs of insn is HW function argument register and set up
26273 is_spilled to true if it is likely spilled HW register. */
26274 static bool
26275 insn_is_function_arg (rtx insn, bool* is_spilled)
26277 rtx dst;
26279 if (!NONDEBUG_INSN_P (insn))
26280 return false;
26281 /* Call instructions are not movable, ignore it. */
26282 if (CALL_P (insn))
26283 return false;
26284 insn = PATTERN (insn);
26285 if (GET_CODE (insn) == PARALLEL)
26286 insn = XVECEXP (insn, 0, 0);
26287 if (GET_CODE (insn) != SET)
26288 return false;
26289 dst = SET_DEST (insn);
26290 if (REG_P (dst) && HARD_REGISTER_P (dst)
26291 && ix86_function_arg_regno_p (REGNO (dst)))
26293 /* Is it likely spilled HW register? */
26294 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26295 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26296 *is_spilled = true;
26297 return true;
26299 return false;
26302 /* Add output dependencies for chain of function adjacent arguments if only
26303 there is a move to likely spilled HW register. Return first argument
26304 if at least one dependence was added or NULL otherwise. */
26305 static rtx
26306 add_parameter_dependencies (rtx call, rtx head)
26308 rtx insn;
26309 rtx last = call;
26310 rtx first_arg = NULL;
26311 bool is_spilled = false;
26313 head = PREV_INSN (head);
26315 /* Find nearest to call argument passing instruction. */
26316 while (true)
26318 last = PREV_INSN (last);
26319 if (last == head)
26320 return NULL;
26321 if (!NONDEBUG_INSN_P (last))
26322 continue;
26323 if (insn_is_function_arg (last, &is_spilled))
26324 break;
26325 return NULL;
26328 first_arg = last;
26329 while (true)
26331 insn = PREV_INSN (last);
26332 if (!INSN_P (insn))
26333 break;
26334 if (insn == head)
26335 break;
26336 if (!NONDEBUG_INSN_P (insn))
26338 last = insn;
26339 continue;
26341 if (insn_is_function_arg (insn, &is_spilled))
26343 /* Add output depdendence between two function arguments if chain
26344 of output arguments contains likely spilled HW registers. */
26345 if (is_spilled)
26346 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26347 first_arg = last = insn;
26349 else
26350 break;
26352 if (!is_spilled)
26353 return NULL;
26354 return first_arg;
26357 /* Add output or anti dependency from insn to first_arg to restrict its code
26358 motion. */
26359 static void
26360 avoid_func_arg_motion (rtx first_arg, rtx insn)
26362 rtx set;
26363 rtx tmp;
26365 set = single_set (insn);
26366 if (!set)
26367 return;
26368 tmp = SET_DEST (set);
26369 if (REG_P (tmp))
26371 /* Add output dependency to the first function argument. */
26372 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26373 return;
26375 /* Add anti dependency. */
26376 add_dependence (first_arg, insn, REG_DEP_ANTI);
26379 /* Avoid cross block motion of function argument through adding dependency
26380 from the first non-jump instruction in bb. */
26381 static void
26382 add_dependee_for_func_arg (rtx arg, basic_block bb)
26384 rtx insn = BB_END (bb);
26386 while (insn)
26388 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26390 rtx set = single_set (insn);
26391 if (set)
26393 avoid_func_arg_motion (arg, insn);
26394 return;
26397 if (insn == BB_HEAD (bb))
26398 return;
26399 insn = PREV_INSN (insn);
26403 /* Hook for pre-reload schedule - avoid motion of function arguments
26404 passed in likely spilled HW registers. */
26405 static void
26406 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26408 rtx insn;
26409 rtx first_arg = NULL;
26410 if (reload_completed)
26411 return;
26412 while (head != tail && DEBUG_INSN_P (head))
26413 head = NEXT_INSN (head);
26414 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26415 if (INSN_P (insn) && CALL_P (insn))
26417 first_arg = add_parameter_dependencies (insn, head);
26418 if (first_arg)
26420 /* Add dependee for first argument to predecessors if only
26421 region contains more than one block. */
26422 basic_block bb = BLOCK_FOR_INSN (insn);
26423 int rgn = CONTAINING_RGN (bb->index);
26424 int nr_blks = RGN_NR_BLOCKS (rgn);
26425 /* Skip trivial regions and region head blocks that can have
26426 predecessors outside of region. */
26427 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26429 edge e;
26430 edge_iterator ei;
26432 /* Regions are SCCs with the exception of selective
26433 scheduling with pipelining of outer blocks enabled.
26434 So also check that immediate predecessors of a non-head
26435 block are in the same region. */
26436 FOR_EACH_EDGE (e, ei, bb->preds)
26438 /* Avoid creating of loop-carried dependencies through
26439 using topological ordering in the region. */
26440 if (rgn == CONTAINING_RGN (e->src->index)
26441 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26442 add_dependee_for_func_arg (first_arg, e->src);
26445 insn = first_arg;
26446 if (insn == head)
26447 break;
26450 else if (first_arg)
26451 avoid_func_arg_motion (first_arg, insn);
26454 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26455 HW registers to maximum, to schedule them at soon as possible. These are
26456 moves from function argument registers at the top of the function entry
26457 and moves from function return value registers after call. */
26458 static int
26459 ix86_adjust_priority (rtx insn, int priority)
26461 rtx set;
26463 if (reload_completed)
26464 return priority;
26466 if (!NONDEBUG_INSN_P (insn))
26467 return priority;
26469 set = single_set (insn);
26470 if (set)
26472 rtx tmp = SET_SRC (set);
26473 if (REG_P (tmp)
26474 && HARD_REGISTER_P (tmp)
26475 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26476 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26477 return current_sched_info->sched_max_insns_priority;
26480 return priority;
26483 /* Model decoder of Core 2/i7.
26484 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26485 track the instruction fetch block boundaries and make sure that long
26486 (9+ bytes) instructions are assigned to D0. */
26488 /* Maximum length of an insn that can be handled by
26489 a secondary decoder unit. '8' for Core 2/i7. */
26490 static int core2i7_secondary_decoder_max_insn_size;
26492 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26493 '16' for Core 2/i7. */
26494 static int core2i7_ifetch_block_size;
26496 /* Maximum number of instructions decoder can handle per cycle.
26497 '6' for Core 2/i7. */
26498 static int core2i7_ifetch_block_max_insns;
26500 typedef struct ix86_first_cycle_multipass_data_ *
26501 ix86_first_cycle_multipass_data_t;
26502 typedef const struct ix86_first_cycle_multipass_data_ *
26503 const_ix86_first_cycle_multipass_data_t;
26505 /* A variable to store target state across calls to max_issue within
26506 one cycle. */
26507 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26508 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26510 /* Initialize DATA. */
26511 static void
26512 core2i7_first_cycle_multipass_init (void *_data)
26514 ix86_first_cycle_multipass_data_t data
26515 = (ix86_first_cycle_multipass_data_t) _data;
26517 data->ifetch_block_len = 0;
26518 data->ifetch_block_n_insns = 0;
26519 data->ready_try_change = NULL;
26520 data->ready_try_change_size = 0;
26523 /* Advancing the cycle; reset ifetch block counts. */
26524 static void
26525 core2i7_dfa_post_advance_cycle (void)
26527 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26529 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26531 data->ifetch_block_len = 0;
26532 data->ifetch_block_n_insns = 0;
26535 static int min_insn_size (rtx);
26537 /* Filter out insns from ready_try that the core will not be able to issue
26538 on current cycle due to decoder. */
26539 static void
26540 core2i7_first_cycle_multipass_filter_ready_try
26541 (const_ix86_first_cycle_multipass_data_t data,
26542 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26544 while (n_ready--)
26546 rtx insn;
26547 int insn_size;
26549 if (ready_try[n_ready])
26550 continue;
26552 insn = get_ready_element (n_ready);
26553 insn_size = min_insn_size (insn);
26555 if (/* If this is a too long an insn for a secondary decoder ... */
26556 (!first_cycle_insn_p
26557 && insn_size > core2i7_secondary_decoder_max_insn_size)
26558 /* ... or it would not fit into the ifetch block ... */
26559 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26560 /* ... or the decoder is full already ... */
26561 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26562 /* ... mask the insn out. */
26564 ready_try[n_ready] = 1;
26566 if (data->ready_try_change)
26567 bitmap_set_bit (data->ready_try_change, n_ready);
26572 /* Prepare for a new round of multipass lookahead scheduling. */
26573 static void
26574 core2i7_first_cycle_multipass_begin (void *_data,
26575 signed char *ready_try, int n_ready,
26576 bool first_cycle_insn_p)
26578 ix86_first_cycle_multipass_data_t data
26579 = (ix86_first_cycle_multipass_data_t) _data;
26580 const_ix86_first_cycle_multipass_data_t prev_data
26581 = ix86_first_cycle_multipass_data;
26583 /* Restore the state from the end of the previous round. */
26584 data->ifetch_block_len = prev_data->ifetch_block_len;
26585 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26587 /* Filter instructions that cannot be issued on current cycle due to
26588 decoder restrictions. */
26589 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26590 first_cycle_insn_p);
26593 /* INSN is being issued in current solution. Account for its impact on
26594 the decoder model. */
26595 static void
26596 core2i7_first_cycle_multipass_issue (void *_data,
26597 signed char *ready_try, int n_ready,
26598 rtx insn, const void *_prev_data)
26600 ix86_first_cycle_multipass_data_t data
26601 = (ix86_first_cycle_multipass_data_t) _data;
26602 const_ix86_first_cycle_multipass_data_t prev_data
26603 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26605 int insn_size = min_insn_size (insn);
26607 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26608 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26609 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26610 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26612 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26613 if (!data->ready_try_change)
26615 data->ready_try_change = sbitmap_alloc (n_ready);
26616 data->ready_try_change_size = n_ready;
26618 else if (data->ready_try_change_size < n_ready)
26620 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26621 n_ready, 0);
26622 data->ready_try_change_size = n_ready;
26624 bitmap_clear (data->ready_try_change);
26626 /* Filter out insns from ready_try that the core will not be able to issue
26627 on current cycle due to decoder. */
26628 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26629 false);
26632 /* Revert the effect on ready_try. */
26633 static void
26634 core2i7_first_cycle_multipass_backtrack (const void *_data,
26635 signed char *ready_try,
26636 int n_ready ATTRIBUTE_UNUSED)
26638 const_ix86_first_cycle_multipass_data_t data
26639 = (const_ix86_first_cycle_multipass_data_t) _data;
26640 unsigned int i = 0;
26641 sbitmap_iterator sbi;
26643 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26644 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26646 ready_try[i] = 0;
26650 /* Save the result of multipass lookahead scheduling for the next round. */
26651 static void
26652 core2i7_first_cycle_multipass_end (const void *_data)
26654 const_ix86_first_cycle_multipass_data_t data
26655 = (const_ix86_first_cycle_multipass_data_t) _data;
26656 ix86_first_cycle_multipass_data_t next_data
26657 = ix86_first_cycle_multipass_data;
26659 if (data != NULL)
26661 next_data->ifetch_block_len = data->ifetch_block_len;
26662 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26666 /* Deallocate target data. */
26667 static void
26668 core2i7_first_cycle_multipass_fini (void *_data)
26670 ix86_first_cycle_multipass_data_t data
26671 = (ix86_first_cycle_multipass_data_t) _data;
26673 if (data->ready_try_change)
26675 sbitmap_free (data->ready_try_change);
26676 data->ready_try_change = NULL;
26677 data->ready_try_change_size = 0;
26681 /* Prepare for scheduling pass. */
26682 static void
26683 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26684 int verbose ATTRIBUTE_UNUSED,
26685 int max_uid ATTRIBUTE_UNUSED)
26687 /* Install scheduling hooks for current CPU. Some of these hooks are used
26688 in time-critical parts of the scheduler, so we only set them up when
26689 they are actually used. */
26690 switch (ix86_tune)
26692 case PROCESSOR_CORE2:
26693 case PROCESSOR_NEHALEM:
26694 case PROCESSOR_SANDYBRIDGE:
26695 case PROCESSOR_HASWELL:
26696 /* Do not perform multipass scheduling for pre-reload schedule
26697 to save compile time. */
26698 if (reload_completed)
26700 targetm.sched.dfa_post_advance_cycle
26701 = core2i7_dfa_post_advance_cycle;
26702 targetm.sched.first_cycle_multipass_init
26703 = core2i7_first_cycle_multipass_init;
26704 targetm.sched.first_cycle_multipass_begin
26705 = core2i7_first_cycle_multipass_begin;
26706 targetm.sched.first_cycle_multipass_issue
26707 = core2i7_first_cycle_multipass_issue;
26708 targetm.sched.first_cycle_multipass_backtrack
26709 = core2i7_first_cycle_multipass_backtrack;
26710 targetm.sched.first_cycle_multipass_end
26711 = core2i7_first_cycle_multipass_end;
26712 targetm.sched.first_cycle_multipass_fini
26713 = core2i7_first_cycle_multipass_fini;
26715 /* Set decoder parameters. */
26716 core2i7_secondary_decoder_max_insn_size = 8;
26717 core2i7_ifetch_block_size = 16;
26718 core2i7_ifetch_block_max_insns = 6;
26719 break;
26721 /* ... Fall through ... */
26722 default:
26723 targetm.sched.dfa_post_advance_cycle = NULL;
26724 targetm.sched.first_cycle_multipass_init = NULL;
26725 targetm.sched.first_cycle_multipass_begin = NULL;
26726 targetm.sched.first_cycle_multipass_issue = NULL;
26727 targetm.sched.first_cycle_multipass_backtrack = NULL;
26728 targetm.sched.first_cycle_multipass_end = NULL;
26729 targetm.sched.first_cycle_multipass_fini = NULL;
26730 break;
26735 /* Compute the alignment given to a constant that is being placed in memory.
26736 EXP is the constant and ALIGN is the alignment that the object would
26737 ordinarily have.
26738 The value of this function is used instead of that alignment to align
26739 the object. */
26742 ix86_constant_alignment (tree exp, int align)
26744 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26745 || TREE_CODE (exp) == INTEGER_CST)
26747 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26748 return 64;
26749 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26750 return 128;
26752 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26753 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26754 return BITS_PER_WORD;
26756 return align;
26759 /* Compute the alignment for a static variable.
26760 TYPE is the data type, and ALIGN is the alignment that
26761 the object would ordinarily have. The value of this function is used
26762 instead of that alignment to align the object. */
26765 ix86_data_alignment (tree type, int align, bool opt)
26767 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26768 for symbols from other compilation units or symbols that don't need
26769 to bind locally. In order to preserve some ABI compatibility with
26770 those compilers, ensure we don't decrease alignment from what we
26771 used to assume. */
26773 int max_align_compat
26774 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26776 /* A data structure, equal or greater than the size of a cache line
26777 (64 bytes in the Pentium 4 and other recent Intel processors, including
26778 processors based on Intel Core microarchitecture) should be aligned
26779 so that its base address is a multiple of a cache line size. */
26781 int max_align
26782 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26784 if (max_align < BITS_PER_WORD)
26785 max_align = BITS_PER_WORD;
26787 if (opt
26788 && AGGREGATE_TYPE_P (type)
26789 && TYPE_SIZE (type)
26790 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26792 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26793 && align < max_align_compat)
26794 align = max_align_compat;
26795 if (wi::geu_p (TYPE_SIZE (type), max_align)
26796 && align < max_align)
26797 align = max_align;
26800 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26801 to 16byte boundary. */
26802 if (TARGET_64BIT)
26804 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26805 && TYPE_SIZE (type)
26806 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26807 && wi::geu_p (TYPE_SIZE (type), 128)
26808 && align < 128)
26809 return 128;
26812 if (!opt)
26813 return align;
26815 if (TREE_CODE (type) == ARRAY_TYPE)
26817 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26818 return 64;
26819 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26820 return 128;
26822 else if (TREE_CODE (type) == COMPLEX_TYPE)
26825 if (TYPE_MODE (type) == DCmode && align < 64)
26826 return 64;
26827 if ((TYPE_MODE (type) == XCmode
26828 || TYPE_MODE (type) == TCmode) && align < 128)
26829 return 128;
26831 else if ((TREE_CODE (type) == RECORD_TYPE
26832 || TREE_CODE (type) == UNION_TYPE
26833 || TREE_CODE (type) == QUAL_UNION_TYPE)
26834 && TYPE_FIELDS (type))
26836 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26837 return 64;
26838 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26839 return 128;
26841 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26842 || TREE_CODE (type) == INTEGER_TYPE)
26844 if (TYPE_MODE (type) == DFmode && align < 64)
26845 return 64;
26846 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26847 return 128;
26850 return align;
26853 /* Compute the alignment for a local variable or a stack slot. EXP is
26854 the data type or decl itself, MODE is the widest mode available and
26855 ALIGN is the alignment that the object would ordinarily have. The
26856 value of this macro is used instead of that alignment to align the
26857 object. */
26859 unsigned int
26860 ix86_local_alignment (tree exp, enum machine_mode mode,
26861 unsigned int align)
26863 tree type, decl;
26865 if (exp && DECL_P (exp))
26867 type = TREE_TYPE (exp);
26868 decl = exp;
26870 else
26872 type = exp;
26873 decl = NULL;
26876 /* Don't do dynamic stack realignment for long long objects with
26877 -mpreferred-stack-boundary=2. */
26878 if (!TARGET_64BIT
26879 && align == 64
26880 && ix86_preferred_stack_boundary < 64
26881 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26882 && (!type || !TYPE_USER_ALIGN (type))
26883 && (!decl || !DECL_USER_ALIGN (decl)))
26884 align = 32;
26886 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26887 register in MODE. We will return the largest alignment of XF
26888 and DF. */
26889 if (!type)
26891 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26892 align = GET_MODE_ALIGNMENT (DFmode);
26893 return align;
26896 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26897 to 16byte boundary. Exact wording is:
26899 An array uses the same alignment as its elements, except that a local or
26900 global array variable of length at least 16 bytes or
26901 a C99 variable-length array variable always has alignment of at least 16 bytes.
26903 This was added to allow use of aligned SSE instructions at arrays. This
26904 rule is meant for static storage (where compiler can not do the analysis
26905 by itself). We follow it for automatic variables only when convenient.
26906 We fully control everything in the function compiled and functions from
26907 other unit can not rely on the alignment.
26909 Exclude va_list type. It is the common case of local array where
26910 we can not benefit from the alignment.
26912 TODO: Probably one should optimize for size only when var is not escaping. */
26913 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26914 && TARGET_SSE)
26916 if (AGGREGATE_TYPE_P (type)
26917 && (va_list_type_node == NULL_TREE
26918 || (TYPE_MAIN_VARIANT (type)
26919 != TYPE_MAIN_VARIANT (va_list_type_node)))
26920 && TYPE_SIZE (type)
26921 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26922 && wi::geu_p (TYPE_SIZE (type), 16)
26923 && align < 128)
26924 return 128;
26926 if (TREE_CODE (type) == ARRAY_TYPE)
26928 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26929 return 64;
26930 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26931 return 128;
26933 else if (TREE_CODE (type) == COMPLEX_TYPE)
26935 if (TYPE_MODE (type) == DCmode && align < 64)
26936 return 64;
26937 if ((TYPE_MODE (type) == XCmode
26938 || TYPE_MODE (type) == TCmode) && align < 128)
26939 return 128;
26941 else if ((TREE_CODE (type) == RECORD_TYPE
26942 || TREE_CODE (type) == UNION_TYPE
26943 || TREE_CODE (type) == QUAL_UNION_TYPE)
26944 && TYPE_FIELDS (type))
26946 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26947 return 64;
26948 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26949 return 128;
26951 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26952 || TREE_CODE (type) == INTEGER_TYPE)
26955 if (TYPE_MODE (type) == DFmode && align < 64)
26956 return 64;
26957 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26958 return 128;
26960 return align;
26963 /* Compute the minimum required alignment for dynamic stack realignment
26964 purposes for a local variable, parameter or a stack slot. EXP is
26965 the data type or decl itself, MODE is its mode and ALIGN is the
26966 alignment that the object would ordinarily have. */
26968 unsigned int
26969 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26970 unsigned int align)
26972 tree type, decl;
26974 if (exp && DECL_P (exp))
26976 type = TREE_TYPE (exp);
26977 decl = exp;
26979 else
26981 type = exp;
26982 decl = NULL;
26985 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26986 return align;
26988 /* Don't do dynamic stack realignment for long long objects with
26989 -mpreferred-stack-boundary=2. */
26990 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26991 && (!type || !TYPE_USER_ALIGN (type))
26992 && (!decl || !DECL_USER_ALIGN (decl)))
26993 return 32;
26995 return align;
26998 /* Find a location for the static chain incoming to a nested function.
26999 This is a register, unless all free registers are used by arguments. */
27001 static rtx
27002 ix86_static_chain (const_tree fndecl, bool incoming_p)
27004 unsigned regno;
27006 if (!DECL_STATIC_CHAIN (fndecl))
27007 return NULL;
27009 if (TARGET_64BIT)
27011 /* We always use R10 in 64-bit mode. */
27012 regno = R10_REG;
27014 else
27016 tree fntype;
27017 unsigned int ccvt;
27019 /* By default in 32-bit mode we use ECX to pass the static chain. */
27020 regno = CX_REG;
27022 fntype = TREE_TYPE (fndecl);
27023 ccvt = ix86_get_callcvt (fntype);
27024 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27026 /* Fastcall functions use ecx/edx for arguments, which leaves
27027 us with EAX for the static chain.
27028 Thiscall functions use ecx for arguments, which also
27029 leaves us with EAX for the static chain. */
27030 regno = AX_REG;
27032 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27034 /* Thiscall functions use ecx for arguments, which leaves
27035 us with EAX and EDX for the static chain.
27036 We are using for abi-compatibility EAX. */
27037 regno = AX_REG;
27039 else if (ix86_function_regparm (fntype, fndecl) == 3)
27041 /* For regparm 3, we have no free call-clobbered registers in
27042 which to store the static chain. In order to implement this,
27043 we have the trampoline push the static chain to the stack.
27044 However, we can't push a value below the return address when
27045 we call the nested function directly, so we have to use an
27046 alternate entry point. For this we use ESI, and have the
27047 alternate entry point push ESI, so that things appear the
27048 same once we're executing the nested function. */
27049 if (incoming_p)
27051 if (fndecl == current_function_decl)
27052 ix86_static_chain_on_stack = true;
27053 return gen_frame_mem (SImode,
27054 plus_constant (Pmode,
27055 arg_pointer_rtx, -8));
27057 regno = SI_REG;
27061 return gen_rtx_REG (Pmode, regno);
27064 /* Emit RTL insns to initialize the variable parts of a trampoline.
27065 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27066 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27067 to be passed to the target function. */
27069 static void
27070 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27072 rtx mem, fnaddr;
27073 int opcode;
27074 int offset = 0;
27076 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27078 if (TARGET_64BIT)
27080 int size;
27082 /* Load the function address to r11. Try to load address using
27083 the shorter movl instead of movabs. We may want to support
27084 movq for kernel mode, but kernel does not use trampolines at
27085 the moment. FNADDR is a 32bit address and may not be in
27086 DImode when ptr_mode == SImode. Always use movl in this
27087 case. */
27088 if (ptr_mode == SImode
27089 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27091 fnaddr = copy_addr_to_reg (fnaddr);
27093 mem = adjust_address (m_tramp, HImode, offset);
27094 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27096 mem = adjust_address (m_tramp, SImode, offset + 2);
27097 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27098 offset += 6;
27100 else
27102 mem = adjust_address (m_tramp, HImode, offset);
27103 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27105 mem = adjust_address (m_tramp, DImode, offset + 2);
27106 emit_move_insn (mem, fnaddr);
27107 offset += 10;
27110 /* Load static chain using movabs to r10. Use the shorter movl
27111 instead of movabs when ptr_mode == SImode. */
27112 if (ptr_mode == SImode)
27114 opcode = 0xba41;
27115 size = 6;
27117 else
27119 opcode = 0xba49;
27120 size = 10;
27123 mem = adjust_address (m_tramp, HImode, offset);
27124 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27126 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27127 emit_move_insn (mem, chain_value);
27128 offset += size;
27130 /* Jump to r11; the last (unused) byte is a nop, only there to
27131 pad the write out to a single 32-bit store. */
27132 mem = adjust_address (m_tramp, SImode, offset);
27133 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27134 offset += 4;
27136 else
27138 rtx disp, chain;
27140 /* Depending on the static chain location, either load a register
27141 with a constant, or push the constant to the stack. All of the
27142 instructions are the same size. */
27143 chain = ix86_static_chain (fndecl, true);
27144 if (REG_P (chain))
27146 switch (REGNO (chain))
27148 case AX_REG:
27149 opcode = 0xb8; break;
27150 case CX_REG:
27151 opcode = 0xb9; break;
27152 default:
27153 gcc_unreachable ();
27156 else
27157 opcode = 0x68;
27159 mem = adjust_address (m_tramp, QImode, offset);
27160 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27162 mem = adjust_address (m_tramp, SImode, offset + 1);
27163 emit_move_insn (mem, chain_value);
27164 offset += 5;
27166 mem = adjust_address (m_tramp, QImode, offset);
27167 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27169 mem = adjust_address (m_tramp, SImode, offset + 1);
27171 /* Compute offset from the end of the jmp to the target function.
27172 In the case in which the trampoline stores the static chain on
27173 the stack, we need to skip the first insn which pushes the
27174 (call-saved) register static chain; this push is 1 byte. */
27175 offset += 5;
27176 disp = expand_binop (SImode, sub_optab, fnaddr,
27177 plus_constant (Pmode, XEXP (m_tramp, 0),
27178 offset - (MEM_P (chain) ? 1 : 0)),
27179 NULL_RTX, 1, OPTAB_DIRECT);
27180 emit_move_insn (mem, disp);
27183 gcc_assert (offset <= TRAMPOLINE_SIZE);
27185 #ifdef HAVE_ENABLE_EXECUTE_STACK
27186 #ifdef CHECK_EXECUTE_STACK_ENABLED
27187 if (CHECK_EXECUTE_STACK_ENABLED)
27188 #endif
27189 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27190 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27191 #endif
27194 /* The following file contains several enumerations and data structures
27195 built from the definitions in i386-builtin-types.def. */
27197 #include "i386-builtin-types.inc"
27199 /* Table for the ix86 builtin non-function types. */
27200 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27202 /* Retrieve an element from the above table, building some of
27203 the types lazily. */
27205 static tree
27206 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27208 unsigned int index;
27209 tree type, itype;
27211 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27213 type = ix86_builtin_type_tab[(int) tcode];
27214 if (type != NULL)
27215 return type;
27217 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27218 if (tcode <= IX86_BT_LAST_VECT)
27220 enum machine_mode mode;
27222 index = tcode - IX86_BT_LAST_PRIM - 1;
27223 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27224 mode = ix86_builtin_type_vect_mode[index];
27226 type = build_vector_type_for_mode (itype, mode);
27228 else
27230 int quals;
27232 index = tcode - IX86_BT_LAST_VECT - 1;
27233 if (tcode <= IX86_BT_LAST_PTR)
27234 quals = TYPE_UNQUALIFIED;
27235 else
27236 quals = TYPE_QUAL_CONST;
27238 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27239 if (quals != TYPE_UNQUALIFIED)
27240 itype = build_qualified_type (itype, quals);
27242 type = build_pointer_type (itype);
27245 ix86_builtin_type_tab[(int) tcode] = type;
27246 return type;
27249 /* Table for the ix86 builtin function types. */
27250 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27252 /* Retrieve an element from the above table, building some of
27253 the types lazily. */
27255 static tree
27256 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27258 tree type;
27260 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27262 type = ix86_builtin_func_type_tab[(int) tcode];
27263 if (type != NULL)
27264 return type;
27266 if (tcode <= IX86_BT_LAST_FUNC)
27268 unsigned start = ix86_builtin_func_start[(int) tcode];
27269 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27270 tree rtype, atype, args = void_list_node;
27271 unsigned i;
27273 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27274 for (i = after - 1; i > start; --i)
27276 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27277 args = tree_cons (NULL, atype, args);
27280 type = build_function_type (rtype, args);
27282 else
27284 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27285 enum ix86_builtin_func_type icode;
27287 icode = ix86_builtin_func_alias_base[index];
27288 type = ix86_get_builtin_func_type (icode);
27291 ix86_builtin_func_type_tab[(int) tcode] = type;
27292 return type;
27296 /* Codes for all the SSE/MMX builtins. */
27297 enum ix86_builtins
27299 IX86_BUILTIN_ADDPS,
27300 IX86_BUILTIN_ADDSS,
27301 IX86_BUILTIN_DIVPS,
27302 IX86_BUILTIN_DIVSS,
27303 IX86_BUILTIN_MULPS,
27304 IX86_BUILTIN_MULSS,
27305 IX86_BUILTIN_SUBPS,
27306 IX86_BUILTIN_SUBSS,
27308 IX86_BUILTIN_CMPEQPS,
27309 IX86_BUILTIN_CMPLTPS,
27310 IX86_BUILTIN_CMPLEPS,
27311 IX86_BUILTIN_CMPGTPS,
27312 IX86_BUILTIN_CMPGEPS,
27313 IX86_BUILTIN_CMPNEQPS,
27314 IX86_BUILTIN_CMPNLTPS,
27315 IX86_BUILTIN_CMPNLEPS,
27316 IX86_BUILTIN_CMPNGTPS,
27317 IX86_BUILTIN_CMPNGEPS,
27318 IX86_BUILTIN_CMPORDPS,
27319 IX86_BUILTIN_CMPUNORDPS,
27320 IX86_BUILTIN_CMPEQSS,
27321 IX86_BUILTIN_CMPLTSS,
27322 IX86_BUILTIN_CMPLESS,
27323 IX86_BUILTIN_CMPNEQSS,
27324 IX86_BUILTIN_CMPNLTSS,
27325 IX86_BUILTIN_CMPNLESS,
27326 IX86_BUILTIN_CMPORDSS,
27327 IX86_BUILTIN_CMPUNORDSS,
27329 IX86_BUILTIN_COMIEQSS,
27330 IX86_BUILTIN_COMILTSS,
27331 IX86_BUILTIN_COMILESS,
27332 IX86_BUILTIN_COMIGTSS,
27333 IX86_BUILTIN_COMIGESS,
27334 IX86_BUILTIN_COMINEQSS,
27335 IX86_BUILTIN_UCOMIEQSS,
27336 IX86_BUILTIN_UCOMILTSS,
27337 IX86_BUILTIN_UCOMILESS,
27338 IX86_BUILTIN_UCOMIGTSS,
27339 IX86_BUILTIN_UCOMIGESS,
27340 IX86_BUILTIN_UCOMINEQSS,
27342 IX86_BUILTIN_CVTPI2PS,
27343 IX86_BUILTIN_CVTPS2PI,
27344 IX86_BUILTIN_CVTSI2SS,
27345 IX86_BUILTIN_CVTSI642SS,
27346 IX86_BUILTIN_CVTSS2SI,
27347 IX86_BUILTIN_CVTSS2SI64,
27348 IX86_BUILTIN_CVTTPS2PI,
27349 IX86_BUILTIN_CVTTSS2SI,
27350 IX86_BUILTIN_CVTTSS2SI64,
27352 IX86_BUILTIN_MAXPS,
27353 IX86_BUILTIN_MAXSS,
27354 IX86_BUILTIN_MINPS,
27355 IX86_BUILTIN_MINSS,
27357 IX86_BUILTIN_LOADUPS,
27358 IX86_BUILTIN_STOREUPS,
27359 IX86_BUILTIN_MOVSS,
27361 IX86_BUILTIN_MOVHLPS,
27362 IX86_BUILTIN_MOVLHPS,
27363 IX86_BUILTIN_LOADHPS,
27364 IX86_BUILTIN_LOADLPS,
27365 IX86_BUILTIN_STOREHPS,
27366 IX86_BUILTIN_STORELPS,
27368 IX86_BUILTIN_MASKMOVQ,
27369 IX86_BUILTIN_MOVMSKPS,
27370 IX86_BUILTIN_PMOVMSKB,
27372 IX86_BUILTIN_MOVNTPS,
27373 IX86_BUILTIN_MOVNTQ,
27375 IX86_BUILTIN_LOADDQU,
27376 IX86_BUILTIN_STOREDQU,
27378 IX86_BUILTIN_PACKSSWB,
27379 IX86_BUILTIN_PACKSSDW,
27380 IX86_BUILTIN_PACKUSWB,
27382 IX86_BUILTIN_PADDB,
27383 IX86_BUILTIN_PADDW,
27384 IX86_BUILTIN_PADDD,
27385 IX86_BUILTIN_PADDQ,
27386 IX86_BUILTIN_PADDSB,
27387 IX86_BUILTIN_PADDSW,
27388 IX86_BUILTIN_PADDUSB,
27389 IX86_BUILTIN_PADDUSW,
27390 IX86_BUILTIN_PSUBB,
27391 IX86_BUILTIN_PSUBW,
27392 IX86_BUILTIN_PSUBD,
27393 IX86_BUILTIN_PSUBQ,
27394 IX86_BUILTIN_PSUBSB,
27395 IX86_BUILTIN_PSUBSW,
27396 IX86_BUILTIN_PSUBUSB,
27397 IX86_BUILTIN_PSUBUSW,
27399 IX86_BUILTIN_PAND,
27400 IX86_BUILTIN_PANDN,
27401 IX86_BUILTIN_POR,
27402 IX86_BUILTIN_PXOR,
27404 IX86_BUILTIN_PAVGB,
27405 IX86_BUILTIN_PAVGW,
27407 IX86_BUILTIN_PCMPEQB,
27408 IX86_BUILTIN_PCMPEQW,
27409 IX86_BUILTIN_PCMPEQD,
27410 IX86_BUILTIN_PCMPGTB,
27411 IX86_BUILTIN_PCMPGTW,
27412 IX86_BUILTIN_PCMPGTD,
27414 IX86_BUILTIN_PMADDWD,
27416 IX86_BUILTIN_PMAXSW,
27417 IX86_BUILTIN_PMAXUB,
27418 IX86_BUILTIN_PMINSW,
27419 IX86_BUILTIN_PMINUB,
27421 IX86_BUILTIN_PMULHUW,
27422 IX86_BUILTIN_PMULHW,
27423 IX86_BUILTIN_PMULLW,
27425 IX86_BUILTIN_PSADBW,
27426 IX86_BUILTIN_PSHUFW,
27428 IX86_BUILTIN_PSLLW,
27429 IX86_BUILTIN_PSLLD,
27430 IX86_BUILTIN_PSLLQ,
27431 IX86_BUILTIN_PSRAW,
27432 IX86_BUILTIN_PSRAD,
27433 IX86_BUILTIN_PSRLW,
27434 IX86_BUILTIN_PSRLD,
27435 IX86_BUILTIN_PSRLQ,
27436 IX86_BUILTIN_PSLLWI,
27437 IX86_BUILTIN_PSLLDI,
27438 IX86_BUILTIN_PSLLQI,
27439 IX86_BUILTIN_PSRAWI,
27440 IX86_BUILTIN_PSRADI,
27441 IX86_BUILTIN_PSRLWI,
27442 IX86_BUILTIN_PSRLDI,
27443 IX86_BUILTIN_PSRLQI,
27445 IX86_BUILTIN_PUNPCKHBW,
27446 IX86_BUILTIN_PUNPCKHWD,
27447 IX86_BUILTIN_PUNPCKHDQ,
27448 IX86_BUILTIN_PUNPCKLBW,
27449 IX86_BUILTIN_PUNPCKLWD,
27450 IX86_BUILTIN_PUNPCKLDQ,
27452 IX86_BUILTIN_SHUFPS,
27454 IX86_BUILTIN_RCPPS,
27455 IX86_BUILTIN_RCPSS,
27456 IX86_BUILTIN_RSQRTPS,
27457 IX86_BUILTIN_RSQRTPS_NR,
27458 IX86_BUILTIN_RSQRTSS,
27459 IX86_BUILTIN_RSQRTF,
27460 IX86_BUILTIN_SQRTPS,
27461 IX86_BUILTIN_SQRTPS_NR,
27462 IX86_BUILTIN_SQRTSS,
27464 IX86_BUILTIN_UNPCKHPS,
27465 IX86_BUILTIN_UNPCKLPS,
27467 IX86_BUILTIN_ANDPS,
27468 IX86_BUILTIN_ANDNPS,
27469 IX86_BUILTIN_ORPS,
27470 IX86_BUILTIN_XORPS,
27472 IX86_BUILTIN_EMMS,
27473 IX86_BUILTIN_LDMXCSR,
27474 IX86_BUILTIN_STMXCSR,
27475 IX86_BUILTIN_SFENCE,
27477 IX86_BUILTIN_FXSAVE,
27478 IX86_BUILTIN_FXRSTOR,
27479 IX86_BUILTIN_FXSAVE64,
27480 IX86_BUILTIN_FXRSTOR64,
27482 IX86_BUILTIN_XSAVE,
27483 IX86_BUILTIN_XRSTOR,
27484 IX86_BUILTIN_XSAVE64,
27485 IX86_BUILTIN_XRSTOR64,
27487 IX86_BUILTIN_XSAVEOPT,
27488 IX86_BUILTIN_XSAVEOPT64,
27490 IX86_BUILTIN_XSAVEC,
27491 IX86_BUILTIN_XSAVEC64,
27493 IX86_BUILTIN_XSAVES,
27494 IX86_BUILTIN_XRSTORS,
27495 IX86_BUILTIN_XSAVES64,
27496 IX86_BUILTIN_XRSTORS64,
27498 /* 3DNow! Original */
27499 IX86_BUILTIN_FEMMS,
27500 IX86_BUILTIN_PAVGUSB,
27501 IX86_BUILTIN_PF2ID,
27502 IX86_BUILTIN_PFACC,
27503 IX86_BUILTIN_PFADD,
27504 IX86_BUILTIN_PFCMPEQ,
27505 IX86_BUILTIN_PFCMPGE,
27506 IX86_BUILTIN_PFCMPGT,
27507 IX86_BUILTIN_PFMAX,
27508 IX86_BUILTIN_PFMIN,
27509 IX86_BUILTIN_PFMUL,
27510 IX86_BUILTIN_PFRCP,
27511 IX86_BUILTIN_PFRCPIT1,
27512 IX86_BUILTIN_PFRCPIT2,
27513 IX86_BUILTIN_PFRSQIT1,
27514 IX86_BUILTIN_PFRSQRT,
27515 IX86_BUILTIN_PFSUB,
27516 IX86_BUILTIN_PFSUBR,
27517 IX86_BUILTIN_PI2FD,
27518 IX86_BUILTIN_PMULHRW,
27520 /* 3DNow! Athlon Extensions */
27521 IX86_BUILTIN_PF2IW,
27522 IX86_BUILTIN_PFNACC,
27523 IX86_BUILTIN_PFPNACC,
27524 IX86_BUILTIN_PI2FW,
27525 IX86_BUILTIN_PSWAPDSI,
27526 IX86_BUILTIN_PSWAPDSF,
27528 /* SSE2 */
27529 IX86_BUILTIN_ADDPD,
27530 IX86_BUILTIN_ADDSD,
27531 IX86_BUILTIN_DIVPD,
27532 IX86_BUILTIN_DIVSD,
27533 IX86_BUILTIN_MULPD,
27534 IX86_BUILTIN_MULSD,
27535 IX86_BUILTIN_SUBPD,
27536 IX86_BUILTIN_SUBSD,
27538 IX86_BUILTIN_CMPEQPD,
27539 IX86_BUILTIN_CMPLTPD,
27540 IX86_BUILTIN_CMPLEPD,
27541 IX86_BUILTIN_CMPGTPD,
27542 IX86_BUILTIN_CMPGEPD,
27543 IX86_BUILTIN_CMPNEQPD,
27544 IX86_BUILTIN_CMPNLTPD,
27545 IX86_BUILTIN_CMPNLEPD,
27546 IX86_BUILTIN_CMPNGTPD,
27547 IX86_BUILTIN_CMPNGEPD,
27548 IX86_BUILTIN_CMPORDPD,
27549 IX86_BUILTIN_CMPUNORDPD,
27550 IX86_BUILTIN_CMPEQSD,
27551 IX86_BUILTIN_CMPLTSD,
27552 IX86_BUILTIN_CMPLESD,
27553 IX86_BUILTIN_CMPNEQSD,
27554 IX86_BUILTIN_CMPNLTSD,
27555 IX86_BUILTIN_CMPNLESD,
27556 IX86_BUILTIN_CMPORDSD,
27557 IX86_BUILTIN_CMPUNORDSD,
27559 IX86_BUILTIN_COMIEQSD,
27560 IX86_BUILTIN_COMILTSD,
27561 IX86_BUILTIN_COMILESD,
27562 IX86_BUILTIN_COMIGTSD,
27563 IX86_BUILTIN_COMIGESD,
27564 IX86_BUILTIN_COMINEQSD,
27565 IX86_BUILTIN_UCOMIEQSD,
27566 IX86_BUILTIN_UCOMILTSD,
27567 IX86_BUILTIN_UCOMILESD,
27568 IX86_BUILTIN_UCOMIGTSD,
27569 IX86_BUILTIN_UCOMIGESD,
27570 IX86_BUILTIN_UCOMINEQSD,
27572 IX86_BUILTIN_MAXPD,
27573 IX86_BUILTIN_MAXSD,
27574 IX86_BUILTIN_MINPD,
27575 IX86_BUILTIN_MINSD,
27577 IX86_BUILTIN_ANDPD,
27578 IX86_BUILTIN_ANDNPD,
27579 IX86_BUILTIN_ORPD,
27580 IX86_BUILTIN_XORPD,
27582 IX86_BUILTIN_SQRTPD,
27583 IX86_BUILTIN_SQRTSD,
27585 IX86_BUILTIN_UNPCKHPD,
27586 IX86_BUILTIN_UNPCKLPD,
27588 IX86_BUILTIN_SHUFPD,
27590 IX86_BUILTIN_LOADUPD,
27591 IX86_BUILTIN_STOREUPD,
27592 IX86_BUILTIN_MOVSD,
27594 IX86_BUILTIN_LOADHPD,
27595 IX86_BUILTIN_LOADLPD,
27597 IX86_BUILTIN_CVTDQ2PD,
27598 IX86_BUILTIN_CVTDQ2PS,
27600 IX86_BUILTIN_CVTPD2DQ,
27601 IX86_BUILTIN_CVTPD2PI,
27602 IX86_BUILTIN_CVTPD2PS,
27603 IX86_BUILTIN_CVTTPD2DQ,
27604 IX86_BUILTIN_CVTTPD2PI,
27606 IX86_BUILTIN_CVTPI2PD,
27607 IX86_BUILTIN_CVTSI2SD,
27608 IX86_BUILTIN_CVTSI642SD,
27610 IX86_BUILTIN_CVTSD2SI,
27611 IX86_BUILTIN_CVTSD2SI64,
27612 IX86_BUILTIN_CVTSD2SS,
27613 IX86_BUILTIN_CVTSS2SD,
27614 IX86_BUILTIN_CVTTSD2SI,
27615 IX86_BUILTIN_CVTTSD2SI64,
27617 IX86_BUILTIN_CVTPS2DQ,
27618 IX86_BUILTIN_CVTPS2PD,
27619 IX86_BUILTIN_CVTTPS2DQ,
27621 IX86_BUILTIN_MOVNTI,
27622 IX86_BUILTIN_MOVNTI64,
27623 IX86_BUILTIN_MOVNTPD,
27624 IX86_BUILTIN_MOVNTDQ,
27626 IX86_BUILTIN_MOVQ128,
27628 /* SSE2 MMX */
27629 IX86_BUILTIN_MASKMOVDQU,
27630 IX86_BUILTIN_MOVMSKPD,
27631 IX86_BUILTIN_PMOVMSKB128,
27633 IX86_BUILTIN_PACKSSWB128,
27634 IX86_BUILTIN_PACKSSDW128,
27635 IX86_BUILTIN_PACKUSWB128,
27637 IX86_BUILTIN_PADDB128,
27638 IX86_BUILTIN_PADDW128,
27639 IX86_BUILTIN_PADDD128,
27640 IX86_BUILTIN_PADDQ128,
27641 IX86_BUILTIN_PADDSB128,
27642 IX86_BUILTIN_PADDSW128,
27643 IX86_BUILTIN_PADDUSB128,
27644 IX86_BUILTIN_PADDUSW128,
27645 IX86_BUILTIN_PSUBB128,
27646 IX86_BUILTIN_PSUBW128,
27647 IX86_BUILTIN_PSUBD128,
27648 IX86_BUILTIN_PSUBQ128,
27649 IX86_BUILTIN_PSUBSB128,
27650 IX86_BUILTIN_PSUBSW128,
27651 IX86_BUILTIN_PSUBUSB128,
27652 IX86_BUILTIN_PSUBUSW128,
27654 IX86_BUILTIN_PAND128,
27655 IX86_BUILTIN_PANDN128,
27656 IX86_BUILTIN_POR128,
27657 IX86_BUILTIN_PXOR128,
27659 IX86_BUILTIN_PAVGB128,
27660 IX86_BUILTIN_PAVGW128,
27662 IX86_BUILTIN_PCMPEQB128,
27663 IX86_BUILTIN_PCMPEQW128,
27664 IX86_BUILTIN_PCMPEQD128,
27665 IX86_BUILTIN_PCMPGTB128,
27666 IX86_BUILTIN_PCMPGTW128,
27667 IX86_BUILTIN_PCMPGTD128,
27669 IX86_BUILTIN_PMADDWD128,
27671 IX86_BUILTIN_PMAXSW128,
27672 IX86_BUILTIN_PMAXUB128,
27673 IX86_BUILTIN_PMINSW128,
27674 IX86_BUILTIN_PMINUB128,
27676 IX86_BUILTIN_PMULUDQ,
27677 IX86_BUILTIN_PMULUDQ128,
27678 IX86_BUILTIN_PMULHUW128,
27679 IX86_BUILTIN_PMULHW128,
27680 IX86_BUILTIN_PMULLW128,
27682 IX86_BUILTIN_PSADBW128,
27683 IX86_BUILTIN_PSHUFHW,
27684 IX86_BUILTIN_PSHUFLW,
27685 IX86_BUILTIN_PSHUFD,
27687 IX86_BUILTIN_PSLLDQI128,
27688 IX86_BUILTIN_PSLLWI128,
27689 IX86_BUILTIN_PSLLDI128,
27690 IX86_BUILTIN_PSLLQI128,
27691 IX86_BUILTIN_PSRAWI128,
27692 IX86_BUILTIN_PSRADI128,
27693 IX86_BUILTIN_PSRLDQI128,
27694 IX86_BUILTIN_PSRLWI128,
27695 IX86_BUILTIN_PSRLDI128,
27696 IX86_BUILTIN_PSRLQI128,
27698 IX86_BUILTIN_PSLLDQ128,
27699 IX86_BUILTIN_PSLLW128,
27700 IX86_BUILTIN_PSLLD128,
27701 IX86_BUILTIN_PSLLQ128,
27702 IX86_BUILTIN_PSRAW128,
27703 IX86_BUILTIN_PSRAD128,
27704 IX86_BUILTIN_PSRLW128,
27705 IX86_BUILTIN_PSRLD128,
27706 IX86_BUILTIN_PSRLQ128,
27708 IX86_BUILTIN_PUNPCKHBW128,
27709 IX86_BUILTIN_PUNPCKHWD128,
27710 IX86_BUILTIN_PUNPCKHDQ128,
27711 IX86_BUILTIN_PUNPCKHQDQ128,
27712 IX86_BUILTIN_PUNPCKLBW128,
27713 IX86_BUILTIN_PUNPCKLWD128,
27714 IX86_BUILTIN_PUNPCKLDQ128,
27715 IX86_BUILTIN_PUNPCKLQDQ128,
27717 IX86_BUILTIN_CLFLUSH,
27718 IX86_BUILTIN_MFENCE,
27719 IX86_BUILTIN_LFENCE,
27720 IX86_BUILTIN_PAUSE,
27722 IX86_BUILTIN_FNSTENV,
27723 IX86_BUILTIN_FLDENV,
27724 IX86_BUILTIN_FNSTSW,
27725 IX86_BUILTIN_FNCLEX,
27727 IX86_BUILTIN_BSRSI,
27728 IX86_BUILTIN_BSRDI,
27729 IX86_BUILTIN_RDPMC,
27730 IX86_BUILTIN_RDTSC,
27731 IX86_BUILTIN_RDTSCP,
27732 IX86_BUILTIN_ROLQI,
27733 IX86_BUILTIN_ROLHI,
27734 IX86_BUILTIN_RORQI,
27735 IX86_BUILTIN_RORHI,
27737 /* SSE3. */
27738 IX86_BUILTIN_ADDSUBPS,
27739 IX86_BUILTIN_HADDPS,
27740 IX86_BUILTIN_HSUBPS,
27741 IX86_BUILTIN_MOVSHDUP,
27742 IX86_BUILTIN_MOVSLDUP,
27743 IX86_BUILTIN_ADDSUBPD,
27744 IX86_BUILTIN_HADDPD,
27745 IX86_BUILTIN_HSUBPD,
27746 IX86_BUILTIN_LDDQU,
27748 IX86_BUILTIN_MONITOR,
27749 IX86_BUILTIN_MWAIT,
27751 /* SSSE3. */
27752 IX86_BUILTIN_PHADDW,
27753 IX86_BUILTIN_PHADDD,
27754 IX86_BUILTIN_PHADDSW,
27755 IX86_BUILTIN_PHSUBW,
27756 IX86_BUILTIN_PHSUBD,
27757 IX86_BUILTIN_PHSUBSW,
27758 IX86_BUILTIN_PMADDUBSW,
27759 IX86_BUILTIN_PMULHRSW,
27760 IX86_BUILTIN_PSHUFB,
27761 IX86_BUILTIN_PSIGNB,
27762 IX86_BUILTIN_PSIGNW,
27763 IX86_BUILTIN_PSIGND,
27764 IX86_BUILTIN_PALIGNR,
27765 IX86_BUILTIN_PABSB,
27766 IX86_BUILTIN_PABSW,
27767 IX86_BUILTIN_PABSD,
27769 IX86_BUILTIN_PHADDW128,
27770 IX86_BUILTIN_PHADDD128,
27771 IX86_BUILTIN_PHADDSW128,
27772 IX86_BUILTIN_PHSUBW128,
27773 IX86_BUILTIN_PHSUBD128,
27774 IX86_BUILTIN_PHSUBSW128,
27775 IX86_BUILTIN_PMADDUBSW128,
27776 IX86_BUILTIN_PMULHRSW128,
27777 IX86_BUILTIN_PSHUFB128,
27778 IX86_BUILTIN_PSIGNB128,
27779 IX86_BUILTIN_PSIGNW128,
27780 IX86_BUILTIN_PSIGND128,
27781 IX86_BUILTIN_PALIGNR128,
27782 IX86_BUILTIN_PABSB128,
27783 IX86_BUILTIN_PABSW128,
27784 IX86_BUILTIN_PABSD128,
27786 /* AMDFAM10 - SSE4A New Instructions. */
27787 IX86_BUILTIN_MOVNTSD,
27788 IX86_BUILTIN_MOVNTSS,
27789 IX86_BUILTIN_EXTRQI,
27790 IX86_BUILTIN_EXTRQ,
27791 IX86_BUILTIN_INSERTQI,
27792 IX86_BUILTIN_INSERTQ,
27794 /* SSE4.1. */
27795 IX86_BUILTIN_BLENDPD,
27796 IX86_BUILTIN_BLENDPS,
27797 IX86_BUILTIN_BLENDVPD,
27798 IX86_BUILTIN_BLENDVPS,
27799 IX86_BUILTIN_PBLENDVB128,
27800 IX86_BUILTIN_PBLENDW128,
27802 IX86_BUILTIN_DPPD,
27803 IX86_BUILTIN_DPPS,
27805 IX86_BUILTIN_INSERTPS128,
27807 IX86_BUILTIN_MOVNTDQA,
27808 IX86_BUILTIN_MPSADBW128,
27809 IX86_BUILTIN_PACKUSDW128,
27810 IX86_BUILTIN_PCMPEQQ,
27811 IX86_BUILTIN_PHMINPOSUW128,
27813 IX86_BUILTIN_PMAXSB128,
27814 IX86_BUILTIN_PMAXSD128,
27815 IX86_BUILTIN_PMAXUD128,
27816 IX86_BUILTIN_PMAXUW128,
27818 IX86_BUILTIN_PMINSB128,
27819 IX86_BUILTIN_PMINSD128,
27820 IX86_BUILTIN_PMINUD128,
27821 IX86_BUILTIN_PMINUW128,
27823 IX86_BUILTIN_PMOVSXBW128,
27824 IX86_BUILTIN_PMOVSXBD128,
27825 IX86_BUILTIN_PMOVSXBQ128,
27826 IX86_BUILTIN_PMOVSXWD128,
27827 IX86_BUILTIN_PMOVSXWQ128,
27828 IX86_BUILTIN_PMOVSXDQ128,
27830 IX86_BUILTIN_PMOVZXBW128,
27831 IX86_BUILTIN_PMOVZXBD128,
27832 IX86_BUILTIN_PMOVZXBQ128,
27833 IX86_BUILTIN_PMOVZXWD128,
27834 IX86_BUILTIN_PMOVZXWQ128,
27835 IX86_BUILTIN_PMOVZXDQ128,
27837 IX86_BUILTIN_PMULDQ128,
27838 IX86_BUILTIN_PMULLD128,
27840 IX86_BUILTIN_ROUNDSD,
27841 IX86_BUILTIN_ROUNDSS,
27843 IX86_BUILTIN_ROUNDPD,
27844 IX86_BUILTIN_ROUNDPS,
27846 IX86_BUILTIN_FLOORPD,
27847 IX86_BUILTIN_CEILPD,
27848 IX86_BUILTIN_TRUNCPD,
27849 IX86_BUILTIN_RINTPD,
27850 IX86_BUILTIN_ROUNDPD_AZ,
27852 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27853 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27854 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27856 IX86_BUILTIN_FLOORPS,
27857 IX86_BUILTIN_CEILPS,
27858 IX86_BUILTIN_TRUNCPS,
27859 IX86_BUILTIN_RINTPS,
27860 IX86_BUILTIN_ROUNDPS_AZ,
27862 IX86_BUILTIN_FLOORPS_SFIX,
27863 IX86_BUILTIN_CEILPS_SFIX,
27864 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27866 IX86_BUILTIN_PTESTZ,
27867 IX86_BUILTIN_PTESTC,
27868 IX86_BUILTIN_PTESTNZC,
27870 IX86_BUILTIN_VEC_INIT_V2SI,
27871 IX86_BUILTIN_VEC_INIT_V4HI,
27872 IX86_BUILTIN_VEC_INIT_V8QI,
27873 IX86_BUILTIN_VEC_EXT_V2DF,
27874 IX86_BUILTIN_VEC_EXT_V2DI,
27875 IX86_BUILTIN_VEC_EXT_V4SF,
27876 IX86_BUILTIN_VEC_EXT_V4SI,
27877 IX86_BUILTIN_VEC_EXT_V8HI,
27878 IX86_BUILTIN_VEC_EXT_V2SI,
27879 IX86_BUILTIN_VEC_EXT_V4HI,
27880 IX86_BUILTIN_VEC_EXT_V16QI,
27881 IX86_BUILTIN_VEC_SET_V2DI,
27882 IX86_BUILTIN_VEC_SET_V4SF,
27883 IX86_BUILTIN_VEC_SET_V4SI,
27884 IX86_BUILTIN_VEC_SET_V8HI,
27885 IX86_BUILTIN_VEC_SET_V4HI,
27886 IX86_BUILTIN_VEC_SET_V16QI,
27888 IX86_BUILTIN_VEC_PACK_SFIX,
27889 IX86_BUILTIN_VEC_PACK_SFIX256,
27891 /* SSE4.2. */
27892 IX86_BUILTIN_CRC32QI,
27893 IX86_BUILTIN_CRC32HI,
27894 IX86_BUILTIN_CRC32SI,
27895 IX86_BUILTIN_CRC32DI,
27897 IX86_BUILTIN_PCMPESTRI128,
27898 IX86_BUILTIN_PCMPESTRM128,
27899 IX86_BUILTIN_PCMPESTRA128,
27900 IX86_BUILTIN_PCMPESTRC128,
27901 IX86_BUILTIN_PCMPESTRO128,
27902 IX86_BUILTIN_PCMPESTRS128,
27903 IX86_BUILTIN_PCMPESTRZ128,
27904 IX86_BUILTIN_PCMPISTRI128,
27905 IX86_BUILTIN_PCMPISTRM128,
27906 IX86_BUILTIN_PCMPISTRA128,
27907 IX86_BUILTIN_PCMPISTRC128,
27908 IX86_BUILTIN_PCMPISTRO128,
27909 IX86_BUILTIN_PCMPISTRS128,
27910 IX86_BUILTIN_PCMPISTRZ128,
27912 IX86_BUILTIN_PCMPGTQ,
27914 /* AES instructions */
27915 IX86_BUILTIN_AESENC128,
27916 IX86_BUILTIN_AESENCLAST128,
27917 IX86_BUILTIN_AESDEC128,
27918 IX86_BUILTIN_AESDECLAST128,
27919 IX86_BUILTIN_AESIMC128,
27920 IX86_BUILTIN_AESKEYGENASSIST128,
27922 /* PCLMUL instruction */
27923 IX86_BUILTIN_PCLMULQDQ128,
27925 /* AVX */
27926 IX86_BUILTIN_ADDPD256,
27927 IX86_BUILTIN_ADDPS256,
27928 IX86_BUILTIN_ADDSUBPD256,
27929 IX86_BUILTIN_ADDSUBPS256,
27930 IX86_BUILTIN_ANDPD256,
27931 IX86_BUILTIN_ANDPS256,
27932 IX86_BUILTIN_ANDNPD256,
27933 IX86_BUILTIN_ANDNPS256,
27934 IX86_BUILTIN_BLENDPD256,
27935 IX86_BUILTIN_BLENDPS256,
27936 IX86_BUILTIN_BLENDVPD256,
27937 IX86_BUILTIN_BLENDVPS256,
27938 IX86_BUILTIN_DIVPD256,
27939 IX86_BUILTIN_DIVPS256,
27940 IX86_BUILTIN_DPPS256,
27941 IX86_BUILTIN_HADDPD256,
27942 IX86_BUILTIN_HADDPS256,
27943 IX86_BUILTIN_HSUBPD256,
27944 IX86_BUILTIN_HSUBPS256,
27945 IX86_BUILTIN_MAXPD256,
27946 IX86_BUILTIN_MAXPS256,
27947 IX86_BUILTIN_MINPD256,
27948 IX86_BUILTIN_MINPS256,
27949 IX86_BUILTIN_MULPD256,
27950 IX86_BUILTIN_MULPS256,
27951 IX86_BUILTIN_ORPD256,
27952 IX86_BUILTIN_ORPS256,
27953 IX86_BUILTIN_SHUFPD256,
27954 IX86_BUILTIN_SHUFPS256,
27955 IX86_BUILTIN_SUBPD256,
27956 IX86_BUILTIN_SUBPS256,
27957 IX86_BUILTIN_XORPD256,
27958 IX86_BUILTIN_XORPS256,
27959 IX86_BUILTIN_CMPSD,
27960 IX86_BUILTIN_CMPSS,
27961 IX86_BUILTIN_CMPPD,
27962 IX86_BUILTIN_CMPPS,
27963 IX86_BUILTIN_CMPPD256,
27964 IX86_BUILTIN_CMPPS256,
27965 IX86_BUILTIN_CVTDQ2PD256,
27966 IX86_BUILTIN_CVTDQ2PS256,
27967 IX86_BUILTIN_CVTPD2PS256,
27968 IX86_BUILTIN_CVTPS2DQ256,
27969 IX86_BUILTIN_CVTPS2PD256,
27970 IX86_BUILTIN_CVTTPD2DQ256,
27971 IX86_BUILTIN_CVTPD2DQ256,
27972 IX86_BUILTIN_CVTTPS2DQ256,
27973 IX86_BUILTIN_EXTRACTF128PD256,
27974 IX86_BUILTIN_EXTRACTF128PS256,
27975 IX86_BUILTIN_EXTRACTF128SI256,
27976 IX86_BUILTIN_VZEROALL,
27977 IX86_BUILTIN_VZEROUPPER,
27978 IX86_BUILTIN_VPERMILVARPD,
27979 IX86_BUILTIN_VPERMILVARPS,
27980 IX86_BUILTIN_VPERMILVARPD256,
27981 IX86_BUILTIN_VPERMILVARPS256,
27982 IX86_BUILTIN_VPERMILPD,
27983 IX86_BUILTIN_VPERMILPS,
27984 IX86_BUILTIN_VPERMILPD256,
27985 IX86_BUILTIN_VPERMILPS256,
27986 IX86_BUILTIN_VPERMIL2PD,
27987 IX86_BUILTIN_VPERMIL2PS,
27988 IX86_BUILTIN_VPERMIL2PD256,
27989 IX86_BUILTIN_VPERMIL2PS256,
27990 IX86_BUILTIN_VPERM2F128PD256,
27991 IX86_BUILTIN_VPERM2F128PS256,
27992 IX86_BUILTIN_VPERM2F128SI256,
27993 IX86_BUILTIN_VBROADCASTSS,
27994 IX86_BUILTIN_VBROADCASTSD256,
27995 IX86_BUILTIN_VBROADCASTSS256,
27996 IX86_BUILTIN_VBROADCASTPD256,
27997 IX86_BUILTIN_VBROADCASTPS256,
27998 IX86_BUILTIN_VINSERTF128PD256,
27999 IX86_BUILTIN_VINSERTF128PS256,
28000 IX86_BUILTIN_VINSERTF128SI256,
28001 IX86_BUILTIN_LOADUPD256,
28002 IX86_BUILTIN_LOADUPS256,
28003 IX86_BUILTIN_STOREUPD256,
28004 IX86_BUILTIN_STOREUPS256,
28005 IX86_BUILTIN_LDDQU256,
28006 IX86_BUILTIN_MOVNTDQ256,
28007 IX86_BUILTIN_MOVNTPD256,
28008 IX86_BUILTIN_MOVNTPS256,
28009 IX86_BUILTIN_LOADDQU256,
28010 IX86_BUILTIN_STOREDQU256,
28011 IX86_BUILTIN_MASKLOADPD,
28012 IX86_BUILTIN_MASKLOADPS,
28013 IX86_BUILTIN_MASKSTOREPD,
28014 IX86_BUILTIN_MASKSTOREPS,
28015 IX86_BUILTIN_MASKLOADPD256,
28016 IX86_BUILTIN_MASKLOADPS256,
28017 IX86_BUILTIN_MASKSTOREPD256,
28018 IX86_BUILTIN_MASKSTOREPS256,
28019 IX86_BUILTIN_MOVSHDUP256,
28020 IX86_BUILTIN_MOVSLDUP256,
28021 IX86_BUILTIN_MOVDDUP256,
28023 IX86_BUILTIN_SQRTPD256,
28024 IX86_BUILTIN_SQRTPS256,
28025 IX86_BUILTIN_SQRTPS_NR256,
28026 IX86_BUILTIN_RSQRTPS256,
28027 IX86_BUILTIN_RSQRTPS_NR256,
28029 IX86_BUILTIN_RCPPS256,
28031 IX86_BUILTIN_ROUNDPD256,
28032 IX86_BUILTIN_ROUNDPS256,
28034 IX86_BUILTIN_FLOORPD256,
28035 IX86_BUILTIN_CEILPD256,
28036 IX86_BUILTIN_TRUNCPD256,
28037 IX86_BUILTIN_RINTPD256,
28038 IX86_BUILTIN_ROUNDPD_AZ256,
28040 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28041 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28042 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28044 IX86_BUILTIN_FLOORPS256,
28045 IX86_BUILTIN_CEILPS256,
28046 IX86_BUILTIN_TRUNCPS256,
28047 IX86_BUILTIN_RINTPS256,
28048 IX86_BUILTIN_ROUNDPS_AZ256,
28050 IX86_BUILTIN_FLOORPS_SFIX256,
28051 IX86_BUILTIN_CEILPS_SFIX256,
28052 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28054 IX86_BUILTIN_UNPCKHPD256,
28055 IX86_BUILTIN_UNPCKLPD256,
28056 IX86_BUILTIN_UNPCKHPS256,
28057 IX86_BUILTIN_UNPCKLPS256,
28059 IX86_BUILTIN_SI256_SI,
28060 IX86_BUILTIN_PS256_PS,
28061 IX86_BUILTIN_PD256_PD,
28062 IX86_BUILTIN_SI_SI256,
28063 IX86_BUILTIN_PS_PS256,
28064 IX86_BUILTIN_PD_PD256,
28066 IX86_BUILTIN_VTESTZPD,
28067 IX86_BUILTIN_VTESTCPD,
28068 IX86_BUILTIN_VTESTNZCPD,
28069 IX86_BUILTIN_VTESTZPS,
28070 IX86_BUILTIN_VTESTCPS,
28071 IX86_BUILTIN_VTESTNZCPS,
28072 IX86_BUILTIN_VTESTZPD256,
28073 IX86_BUILTIN_VTESTCPD256,
28074 IX86_BUILTIN_VTESTNZCPD256,
28075 IX86_BUILTIN_VTESTZPS256,
28076 IX86_BUILTIN_VTESTCPS256,
28077 IX86_BUILTIN_VTESTNZCPS256,
28078 IX86_BUILTIN_PTESTZ256,
28079 IX86_BUILTIN_PTESTC256,
28080 IX86_BUILTIN_PTESTNZC256,
28082 IX86_BUILTIN_MOVMSKPD256,
28083 IX86_BUILTIN_MOVMSKPS256,
28085 /* AVX2 */
28086 IX86_BUILTIN_MPSADBW256,
28087 IX86_BUILTIN_PABSB256,
28088 IX86_BUILTIN_PABSW256,
28089 IX86_BUILTIN_PABSD256,
28090 IX86_BUILTIN_PACKSSDW256,
28091 IX86_BUILTIN_PACKSSWB256,
28092 IX86_BUILTIN_PACKUSDW256,
28093 IX86_BUILTIN_PACKUSWB256,
28094 IX86_BUILTIN_PADDB256,
28095 IX86_BUILTIN_PADDW256,
28096 IX86_BUILTIN_PADDD256,
28097 IX86_BUILTIN_PADDQ256,
28098 IX86_BUILTIN_PADDSB256,
28099 IX86_BUILTIN_PADDSW256,
28100 IX86_BUILTIN_PADDUSB256,
28101 IX86_BUILTIN_PADDUSW256,
28102 IX86_BUILTIN_PALIGNR256,
28103 IX86_BUILTIN_AND256I,
28104 IX86_BUILTIN_ANDNOT256I,
28105 IX86_BUILTIN_PAVGB256,
28106 IX86_BUILTIN_PAVGW256,
28107 IX86_BUILTIN_PBLENDVB256,
28108 IX86_BUILTIN_PBLENDVW256,
28109 IX86_BUILTIN_PCMPEQB256,
28110 IX86_BUILTIN_PCMPEQW256,
28111 IX86_BUILTIN_PCMPEQD256,
28112 IX86_BUILTIN_PCMPEQQ256,
28113 IX86_BUILTIN_PCMPGTB256,
28114 IX86_BUILTIN_PCMPGTW256,
28115 IX86_BUILTIN_PCMPGTD256,
28116 IX86_BUILTIN_PCMPGTQ256,
28117 IX86_BUILTIN_PHADDW256,
28118 IX86_BUILTIN_PHADDD256,
28119 IX86_BUILTIN_PHADDSW256,
28120 IX86_BUILTIN_PHSUBW256,
28121 IX86_BUILTIN_PHSUBD256,
28122 IX86_BUILTIN_PHSUBSW256,
28123 IX86_BUILTIN_PMADDUBSW256,
28124 IX86_BUILTIN_PMADDWD256,
28125 IX86_BUILTIN_PMAXSB256,
28126 IX86_BUILTIN_PMAXSW256,
28127 IX86_BUILTIN_PMAXSD256,
28128 IX86_BUILTIN_PMAXUB256,
28129 IX86_BUILTIN_PMAXUW256,
28130 IX86_BUILTIN_PMAXUD256,
28131 IX86_BUILTIN_PMINSB256,
28132 IX86_BUILTIN_PMINSW256,
28133 IX86_BUILTIN_PMINSD256,
28134 IX86_BUILTIN_PMINUB256,
28135 IX86_BUILTIN_PMINUW256,
28136 IX86_BUILTIN_PMINUD256,
28137 IX86_BUILTIN_PMOVMSKB256,
28138 IX86_BUILTIN_PMOVSXBW256,
28139 IX86_BUILTIN_PMOVSXBD256,
28140 IX86_BUILTIN_PMOVSXBQ256,
28141 IX86_BUILTIN_PMOVSXWD256,
28142 IX86_BUILTIN_PMOVSXWQ256,
28143 IX86_BUILTIN_PMOVSXDQ256,
28144 IX86_BUILTIN_PMOVZXBW256,
28145 IX86_BUILTIN_PMOVZXBD256,
28146 IX86_BUILTIN_PMOVZXBQ256,
28147 IX86_BUILTIN_PMOVZXWD256,
28148 IX86_BUILTIN_PMOVZXWQ256,
28149 IX86_BUILTIN_PMOVZXDQ256,
28150 IX86_BUILTIN_PMULDQ256,
28151 IX86_BUILTIN_PMULHRSW256,
28152 IX86_BUILTIN_PMULHUW256,
28153 IX86_BUILTIN_PMULHW256,
28154 IX86_BUILTIN_PMULLW256,
28155 IX86_BUILTIN_PMULLD256,
28156 IX86_BUILTIN_PMULUDQ256,
28157 IX86_BUILTIN_POR256,
28158 IX86_BUILTIN_PSADBW256,
28159 IX86_BUILTIN_PSHUFB256,
28160 IX86_BUILTIN_PSHUFD256,
28161 IX86_BUILTIN_PSHUFHW256,
28162 IX86_BUILTIN_PSHUFLW256,
28163 IX86_BUILTIN_PSIGNB256,
28164 IX86_BUILTIN_PSIGNW256,
28165 IX86_BUILTIN_PSIGND256,
28166 IX86_BUILTIN_PSLLDQI256,
28167 IX86_BUILTIN_PSLLWI256,
28168 IX86_BUILTIN_PSLLW256,
28169 IX86_BUILTIN_PSLLDI256,
28170 IX86_BUILTIN_PSLLD256,
28171 IX86_BUILTIN_PSLLQI256,
28172 IX86_BUILTIN_PSLLQ256,
28173 IX86_BUILTIN_PSRAWI256,
28174 IX86_BUILTIN_PSRAW256,
28175 IX86_BUILTIN_PSRADI256,
28176 IX86_BUILTIN_PSRAD256,
28177 IX86_BUILTIN_PSRLDQI256,
28178 IX86_BUILTIN_PSRLWI256,
28179 IX86_BUILTIN_PSRLW256,
28180 IX86_BUILTIN_PSRLDI256,
28181 IX86_BUILTIN_PSRLD256,
28182 IX86_BUILTIN_PSRLQI256,
28183 IX86_BUILTIN_PSRLQ256,
28184 IX86_BUILTIN_PSUBB256,
28185 IX86_BUILTIN_PSUBW256,
28186 IX86_BUILTIN_PSUBD256,
28187 IX86_BUILTIN_PSUBQ256,
28188 IX86_BUILTIN_PSUBSB256,
28189 IX86_BUILTIN_PSUBSW256,
28190 IX86_BUILTIN_PSUBUSB256,
28191 IX86_BUILTIN_PSUBUSW256,
28192 IX86_BUILTIN_PUNPCKHBW256,
28193 IX86_BUILTIN_PUNPCKHWD256,
28194 IX86_BUILTIN_PUNPCKHDQ256,
28195 IX86_BUILTIN_PUNPCKHQDQ256,
28196 IX86_BUILTIN_PUNPCKLBW256,
28197 IX86_BUILTIN_PUNPCKLWD256,
28198 IX86_BUILTIN_PUNPCKLDQ256,
28199 IX86_BUILTIN_PUNPCKLQDQ256,
28200 IX86_BUILTIN_PXOR256,
28201 IX86_BUILTIN_MOVNTDQA256,
28202 IX86_BUILTIN_VBROADCASTSS_PS,
28203 IX86_BUILTIN_VBROADCASTSS_PS256,
28204 IX86_BUILTIN_VBROADCASTSD_PD256,
28205 IX86_BUILTIN_VBROADCASTSI256,
28206 IX86_BUILTIN_PBLENDD256,
28207 IX86_BUILTIN_PBLENDD128,
28208 IX86_BUILTIN_PBROADCASTB256,
28209 IX86_BUILTIN_PBROADCASTW256,
28210 IX86_BUILTIN_PBROADCASTD256,
28211 IX86_BUILTIN_PBROADCASTQ256,
28212 IX86_BUILTIN_PBROADCASTB128,
28213 IX86_BUILTIN_PBROADCASTW128,
28214 IX86_BUILTIN_PBROADCASTD128,
28215 IX86_BUILTIN_PBROADCASTQ128,
28216 IX86_BUILTIN_VPERMVARSI256,
28217 IX86_BUILTIN_VPERMDF256,
28218 IX86_BUILTIN_VPERMVARSF256,
28219 IX86_BUILTIN_VPERMDI256,
28220 IX86_BUILTIN_VPERMTI256,
28221 IX86_BUILTIN_VEXTRACT128I256,
28222 IX86_BUILTIN_VINSERT128I256,
28223 IX86_BUILTIN_MASKLOADD,
28224 IX86_BUILTIN_MASKLOADQ,
28225 IX86_BUILTIN_MASKLOADD256,
28226 IX86_BUILTIN_MASKLOADQ256,
28227 IX86_BUILTIN_MASKSTORED,
28228 IX86_BUILTIN_MASKSTOREQ,
28229 IX86_BUILTIN_MASKSTORED256,
28230 IX86_BUILTIN_MASKSTOREQ256,
28231 IX86_BUILTIN_PSLLVV4DI,
28232 IX86_BUILTIN_PSLLVV2DI,
28233 IX86_BUILTIN_PSLLVV8SI,
28234 IX86_BUILTIN_PSLLVV4SI,
28235 IX86_BUILTIN_PSRAVV8SI,
28236 IX86_BUILTIN_PSRAVV4SI,
28237 IX86_BUILTIN_PSRLVV4DI,
28238 IX86_BUILTIN_PSRLVV2DI,
28239 IX86_BUILTIN_PSRLVV8SI,
28240 IX86_BUILTIN_PSRLVV4SI,
28242 IX86_BUILTIN_GATHERSIV2DF,
28243 IX86_BUILTIN_GATHERSIV4DF,
28244 IX86_BUILTIN_GATHERDIV2DF,
28245 IX86_BUILTIN_GATHERDIV4DF,
28246 IX86_BUILTIN_GATHERSIV4SF,
28247 IX86_BUILTIN_GATHERSIV8SF,
28248 IX86_BUILTIN_GATHERDIV4SF,
28249 IX86_BUILTIN_GATHERDIV8SF,
28250 IX86_BUILTIN_GATHERSIV2DI,
28251 IX86_BUILTIN_GATHERSIV4DI,
28252 IX86_BUILTIN_GATHERDIV2DI,
28253 IX86_BUILTIN_GATHERDIV4DI,
28254 IX86_BUILTIN_GATHERSIV4SI,
28255 IX86_BUILTIN_GATHERSIV8SI,
28256 IX86_BUILTIN_GATHERDIV4SI,
28257 IX86_BUILTIN_GATHERDIV8SI,
28259 /* AVX512F */
28260 IX86_BUILTIN_ADDPD512,
28261 IX86_BUILTIN_ADDPS512,
28262 IX86_BUILTIN_ADDSD_ROUND,
28263 IX86_BUILTIN_ADDSS_ROUND,
28264 IX86_BUILTIN_ALIGND512,
28265 IX86_BUILTIN_ALIGNQ512,
28266 IX86_BUILTIN_BLENDMD512,
28267 IX86_BUILTIN_BLENDMPD512,
28268 IX86_BUILTIN_BLENDMPS512,
28269 IX86_BUILTIN_BLENDMQ512,
28270 IX86_BUILTIN_BROADCASTF32X4_512,
28271 IX86_BUILTIN_BROADCASTF64X4_512,
28272 IX86_BUILTIN_BROADCASTI32X4_512,
28273 IX86_BUILTIN_BROADCASTI64X4_512,
28274 IX86_BUILTIN_BROADCASTSD512,
28275 IX86_BUILTIN_BROADCASTSS512,
28276 IX86_BUILTIN_CMPD512,
28277 IX86_BUILTIN_CMPPD512,
28278 IX86_BUILTIN_CMPPS512,
28279 IX86_BUILTIN_CMPQ512,
28280 IX86_BUILTIN_CMPSD_MASK,
28281 IX86_BUILTIN_CMPSS_MASK,
28282 IX86_BUILTIN_COMIDF,
28283 IX86_BUILTIN_COMISF,
28284 IX86_BUILTIN_COMPRESSPD512,
28285 IX86_BUILTIN_COMPRESSPDSTORE512,
28286 IX86_BUILTIN_COMPRESSPS512,
28287 IX86_BUILTIN_COMPRESSPSSTORE512,
28288 IX86_BUILTIN_CVTDQ2PD512,
28289 IX86_BUILTIN_CVTDQ2PS512,
28290 IX86_BUILTIN_CVTPD2DQ512,
28291 IX86_BUILTIN_CVTPD2PS512,
28292 IX86_BUILTIN_CVTPD2UDQ512,
28293 IX86_BUILTIN_CVTPH2PS512,
28294 IX86_BUILTIN_CVTPS2DQ512,
28295 IX86_BUILTIN_CVTPS2PD512,
28296 IX86_BUILTIN_CVTPS2PH512,
28297 IX86_BUILTIN_CVTPS2UDQ512,
28298 IX86_BUILTIN_CVTSD2SS_ROUND,
28299 IX86_BUILTIN_CVTSI2SD64,
28300 IX86_BUILTIN_CVTSI2SS32,
28301 IX86_BUILTIN_CVTSI2SS64,
28302 IX86_BUILTIN_CVTSS2SD_ROUND,
28303 IX86_BUILTIN_CVTTPD2DQ512,
28304 IX86_BUILTIN_CVTTPD2UDQ512,
28305 IX86_BUILTIN_CVTTPS2DQ512,
28306 IX86_BUILTIN_CVTTPS2UDQ512,
28307 IX86_BUILTIN_CVTUDQ2PD512,
28308 IX86_BUILTIN_CVTUDQ2PS512,
28309 IX86_BUILTIN_CVTUSI2SD32,
28310 IX86_BUILTIN_CVTUSI2SD64,
28311 IX86_BUILTIN_CVTUSI2SS32,
28312 IX86_BUILTIN_CVTUSI2SS64,
28313 IX86_BUILTIN_DIVPD512,
28314 IX86_BUILTIN_DIVPS512,
28315 IX86_BUILTIN_DIVSD_ROUND,
28316 IX86_BUILTIN_DIVSS_ROUND,
28317 IX86_BUILTIN_EXPANDPD512,
28318 IX86_BUILTIN_EXPANDPD512Z,
28319 IX86_BUILTIN_EXPANDPDLOAD512,
28320 IX86_BUILTIN_EXPANDPDLOAD512Z,
28321 IX86_BUILTIN_EXPANDPS512,
28322 IX86_BUILTIN_EXPANDPS512Z,
28323 IX86_BUILTIN_EXPANDPSLOAD512,
28324 IX86_BUILTIN_EXPANDPSLOAD512Z,
28325 IX86_BUILTIN_EXTRACTF32X4,
28326 IX86_BUILTIN_EXTRACTF64X4,
28327 IX86_BUILTIN_EXTRACTI32X4,
28328 IX86_BUILTIN_EXTRACTI64X4,
28329 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28330 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28331 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28332 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28333 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28334 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28335 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28336 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28337 IX86_BUILTIN_GETEXPPD512,
28338 IX86_BUILTIN_GETEXPPS512,
28339 IX86_BUILTIN_GETEXPSD128,
28340 IX86_BUILTIN_GETEXPSS128,
28341 IX86_BUILTIN_GETMANTPD512,
28342 IX86_BUILTIN_GETMANTPS512,
28343 IX86_BUILTIN_GETMANTSD128,
28344 IX86_BUILTIN_GETMANTSS128,
28345 IX86_BUILTIN_INSERTF32X4,
28346 IX86_BUILTIN_INSERTF64X4,
28347 IX86_BUILTIN_INSERTI32X4,
28348 IX86_BUILTIN_INSERTI64X4,
28349 IX86_BUILTIN_LOADAPD512,
28350 IX86_BUILTIN_LOADAPS512,
28351 IX86_BUILTIN_LOADDQUDI512,
28352 IX86_BUILTIN_LOADDQUSI512,
28353 IX86_BUILTIN_LOADUPD512,
28354 IX86_BUILTIN_LOADUPS512,
28355 IX86_BUILTIN_MAXPD512,
28356 IX86_BUILTIN_MAXPS512,
28357 IX86_BUILTIN_MAXSD_ROUND,
28358 IX86_BUILTIN_MAXSS_ROUND,
28359 IX86_BUILTIN_MINPD512,
28360 IX86_BUILTIN_MINPS512,
28361 IX86_BUILTIN_MINSD_ROUND,
28362 IX86_BUILTIN_MINSS_ROUND,
28363 IX86_BUILTIN_MOVAPD512,
28364 IX86_BUILTIN_MOVAPS512,
28365 IX86_BUILTIN_MOVDDUP512,
28366 IX86_BUILTIN_MOVDQA32LOAD512,
28367 IX86_BUILTIN_MOVDQA32STORE512,
28368 IX86_BUILTIN_MOVDQA32_512,
28369 IX86_BUILTIN_MOVDQA64LOAD512,
28370 IX86_BUILTIN_MOVDQA64STORE512,
28371 IX86_BUILTIN_MOVDQA64_512,
28372 IX86_BUILTIN_MOVNTDQ512,
28373 IX86_BUILTIN_MOVNTDQA512,
28374 IX86_BUILTIN_MOVNTPD512,
28375 IX86_BUILTIN_MOVNTPS512,
28376 IX86_BUILTIN_MOVSHDUP512,
28377 IX86_BUILTIN_MOVSLDUP512,
28378 IX86_BUILTIN_MULPD512,
28379 IX86_BUILTIN_MULPS512,
28380 IX86_BUILTIN_MULSD_ROUND,
28381 IX86_BUILTIN_MULSS_ROUND,
28382 IX86_BUILTIN_PABSD512,
28383 IX86_BUILTIN_PABSQ512,
28384 IX86_BUILTIN_PADDD512,
28385 IX86_BUILTIN_PADDQ512,
28386 IX86_BUILTIN_PANDD512,
28387 IX86_BUILTIN_PANDND512,
28388 IX86_BUILTIN_PANDNQ512,
28389 IX86_BUILTIN_PANDQ512,
28390 IX86_BUILTIN_PBROADCASTD512,
28391 IX86_BUILTIN_PBROADCASTD512_GPR,
28392 IX86_BUILTIN_PBROADCASTMB512,
28393 IX86_BUILTIN_PBROADCASTMW512,
28394 IX86_BUILTIN_PBROADCASTQ512,
28395 IX86_BUILTIN_PBROADCASTQ512_GPR,
28396 IX86_BUILTIN_PBROADCASTQ512_MEM,
28397 IX86_BUILTIN_PCMPEQD512_MASK,
28398 IX86_BUILTIN_PCMPEQQ512_MASK,
28399 IX86_BUILTIN_PCMPGTD512_MASK,
28400 IX86_BUILTIN_PCMPGTQ512_MASK,
28401 IX86_BUILTIN_PCOMPRESSD512,
28402 IX86_BUILTIN_PCOMPRESSDSTORE512,
28403 IX86_BUILTIN_PCOMPRESSQ512,
28404 IX86_BUILTIN_PCOMPRESSQSTORE512,
28405 IX86_BUILTIN_PEXPANDD512,
28406 IX86_BUILTIN_PEXPANDD512Z,
28407 IX86_BUILTIN_PEXPANDDLOAD512,
28408 IX86_BUILTIN_PEXPANDDLOAD512Z,
28409 IX86_BUILTIN_PEXPANDQ512,
28410 IX86_BUILTIN_PEXPANDQ512Z,
28411 IX86_BUILTIN_PEXPANDQLOAD512,
28412 IX86_BUILTIN_PEXPANDQLOAD512Z,
28413 IX86_BUILTIN_PMAXSD512,
28414 IX86_BUILTIN_PMAXSQ512,
28415 IX86_BUILTIN_PMAXUD512,
28416 IX86_BUILTIN_PMAXUQ512,
28417 IX86_BUILTIN_PMINSD512,
28418 IX86_BUILTIN_PMINSQ512,
28419 IX86_BUILTIN_PMINUD512,
28420 IX86_BUILTIN_PMINUQ512,
28421 IX86_BUILTIN_PMOVDB512,
28422 IX86_BUILTIN_PMOVDB512_MEM,
28423 IX86_BUILTIN_PMOVDW512,
28424 IX86_BUILTIN_PMOVDW512_MEM,
28425 IX86_BUILTIN_PMOVQB512,
28426 IX86_BUILTIN_PMOVQB512_MEM,
28427 IX86_BUILTIN_PMOVQD512,
28428 IX86_BUILTIN_PMOVQD512_MEM,
28429 IX86_BUILTIN_PMOVQW512,
28430 IX86_BUILTIN_PMOVQW512_MEM,
28431 IX86_BUILTIN_PMOVSDB512,
28432 IX86_BUILTIN_PMOVSDB512_MEM,
28433 IX86_BUILTIN_PMOVSDW512,
28434 IX86_BUILTIN_PMOVSDW512_MEM,
28435 IX86_BUILTIN_PMOVSQB512,
28436 IX86_BUILTIN_PMOVSQB512_MEM,
28437 IX86_BUILTIN_PMOVSQD512,
28438 IX86_BUILTIN_PMOVSQD512_MEM,
28439 IX86_BUILTIN_PMOVSQW512,
28440 IX86_BUILTIN_PMOVSQW512_MEM,
28441 IX86_BUILTIN_PMOVSXBD512,
28442 IX86_BUILTIN_PMOVSXBQ512,
28443 IX86_BUILTIN_PMOVSXDQ512,
28444 IX86_BUILTIN_PMOVSXWD512,
28445 IX86_BUILTIN_PMOVSXWQ512,
28446 IX86_BUILTIN_PMOVUSDB512,
28447 IX86_BUILTIN_PMOVUSDB512_MEM,
28448 IX86_BUILTIN_PMOVUSDW512,
28449 IX86_BUILTIN_PMOVUSDW512_MEM,
28450 IX86_BUILTIN_PMOVUSQB512,
28451 IX86_BUILTIN_PMOVUSQB512_MEM,
28452 IX86_BUILTIN_PMOVUSQD512,
28453 IX86_BUILTIN_PMOVUSQD512_MEM,
28454 IX86_BUILTIN_PMOVUSQW512,
28455 IX86_BUILTIN_PMOVUSQW512_MEM,
28456 IX86_BUILTIN_PMOVZXBD512,
28457 IX86_BUILTIN_PMOVZXBQ512,
28458 IX86_BUILTIN_PMOVZXDQ512,
28459 IX86_BUILTIN_PMOVZXWD512,
28460 IX86_BUILTIN_PMOVZXWQ512,
28461 IX86_BUILTIN_PMULDQ512,
28462 IX86_BUILTIN_PMULLD512,
28463 IX86_BUILTIN_PMULUDQ512,
28464 IX86_BUILTIN_PORD512,
28465 IX86_BUILTIN_PORQ512,
28466 IX86_BUILTIN_PROLD512,
28467 IX86_BUILTIN_PROLQ512,
28468 IX86_BUILTIN_PROLVD512,
28469 IX86_BUILTIN_PROLVQ512,
28470 IX86_BUILTIN_PRORD512,
28471 IX86_BUILTIN_PRORQ512,
28472 IX86_BUILTIN_PRORVD512,
28473 IX86_BUILTIN_PRORVQ512,
28474 IX86_BUILTIN_PSHUFD512,
28475 IX86_BUILTIN_PSLLD512,
28476 IX86_BUILTIN_PSLLDI512,
28477 IX86_BUILTIN_PSLLQ512,
28478 IX86_BUILTIN_PSLLQI512,
28479 IX86_BUILTIN_PSLLVV16SI,
28480 IX86_BUILTIN_PSLLVV8DI,
28481 IX86_BUILTIN_PSRAD512,
28482 IX86_BUILTIN_PSRADI512,
28483 IX86_BUILTIN_PSRAQ512,
28484 IX86_BUILTIN_PSRAQI512,
28485 IX86_BUILTIN_PSRAVV16SI,
28486 IX86_BUILTIN_PSRAVV8DI,
28487 IX86_BUILTIN_PSRLD512,
28488 IX86_BUILTIN_PSRLDI512,
28489 IX86_BUILTIN_PSRLQ512,
28490 IX86_BUILTIN_PSRLQI512,
28491 IX86_BUILTIN_PSRLVV16SI,
28492 IX86_BUILTIN_PSRLVV8DI,
28493 IX86_BUILTIN_PSUBD512,
28494 IX86_BUILTIN_PSUBQ512,
28495 IX86_BUILTIN_PTESTMD512,
28496 IX86_BUILTIN_PTESTMQ512,
28497 IX86_BUILTIN_PTESTNMD512,
28498 IX86_BUILTIN_PTESTNMQ512,
28499 IX86_BUILTIN_PUNPCKHDQ512,
28500 IX86_BUILTIN_PUNPCKHQDQ512,
28501 IX86_BUILTIN_PUNPCKLDQ512,
28502 IX86_BUILTIN_PUNPCKLQDQ512,
28503 IX86_BUILTIN_PXORD512,
28504 IX86_BUILTIN_PXORQ512,
28505 IX86_BUILTIN_RCP14PD512,
28506 IX86_BUILTIN_RCP14PS512,
28507 IX86_BUILTIN_RCP14SD,
28508 IX86_BUILTIN_RCP14SS,
28509 IX86_BUILTIN_RNDSCALEPD,
28510 IX86_BUILTIN_RNDSCALEPS,
28511 IX86_BUILTIN_RNDSCALESD,
28512 IX86_BUILTIN_RNDSCALESS,
28513 IX86_BUILTIN_RSQRT14PD512,
28514 IX86_BUILTIN_RSQRT14PS512,
28515 IX86_BUILTIN_RSQRT14SD,
28516 IX86_BUILTIN_RSQRT14SS,
28517 IX86_BUILTIN_SCALEFPD512,
28518 IX86_BUILTIN_SCALEFPS512,
28519 IX86_BUILTIN_SCALEFSD,
28520 IX86_BUILTIN_SCALEFSS,
28521 IX86_BUILTIN_SHUFPD512,
28522 IX86_BUILTIN_SHUFPS512,
28523 IX86_BUILTIN_SHUF_F32x4,
28524 IX86_BUILTIN_SHUF_F64x2,
28525 IX86_BUILTIN_SHUF_I32x4,
28526 IX86_BUILTIN_SHUF_I64x2,
28527 IX86_BUILTIN_SQRTPD512,
28528 IX86_BUILTIN_SQRTPD512_MASK,
28529 IX86_BUILTIN_SQRTPS512_MASK,
28530 IX86_BUILTIN_SQRTPS_NR512,
28531 IX86_BUILTIN_SQRTSD_ROUND,
28532 IX86_BUILTIN_SQRTSS_ROUND,
28533 IX86_BUILTIN_STOREAPD512,
28534 IX86_BUILTIN_STOREAPS512,
28535 IX86_BUILTIN_STOREDQUDI512,
28536 IX86_BUILTIN_STOREDQUSI512,
28537 IX86_BUILTIN_STOREUPD512,
28538 IX86_BUILTIN_STOREUPS512,
28539 IX86_BUILTIN_SUBPD512,
28540 IX86_BUILTIN_SUBPS512,
28541 IX86_BUILTIN_SUBSD_ROUND,
28542 IX86_BUILTIN_SUBSS_ROUND,
28543 IX86_BUILTIN_UCMPD512,
28544 IX86_BUILTIN_UCMPQ512,
28545 IX86_BUILTIN_UNPCKHPD512,
28546 IX86_BUILTIN_UNPCKHPS512,
28547 IX86_BUILTIN_UNPCKLPD512,
28548 IX86_BUILTIN_UNPCKLPS512,
28549 IX86_BUILTIN_VCVTSD2SI32,
28550 IX86_BUILTIN_VCVTSD2SI64,
28551 IX86_BUILTIN_VCVTSD2USI32,
28552 IX86_BUILTIN_VCVTSD2USI64,
28553 IX86_BUILTIN_VCVTSS2SI32,
28554 IX86_BUILTIN_VCVTSS2SI64,
28555 IX86_BUILTIN_VCVTSS2USI32,
28556 IX86_BUILTIN_VCVTSS2USI64,
28557 IX86_BUILTIN_VCVTTSD2SI32,
28558 IX86_BUILTIN_VCVTTSD2SI64,
28559 IX86_BUILTIN_VCVTTSD2USI32,
28560 IX86_BUILTIN_VCVTTSD2USI64,
28561 IX86_BUILTIN_VCVTTSS2SI32,
28562 IX86_BUILTIN_VCVTTSS2SI64,
28563 IX86_BUILTIN_VCVTTSS2USI32,
28564 IX86_BUILTIN_VCVTTSS2USI64,
28565 IX86_BUILTIN_VFMADDPD512_MASK,
28566 IX86_BUILTIN_VFMADDPD512_MASK3,
28567 IX86_BUILTIN_VFMADDPD512_MASKZ,
28568 IX86_BUILTIN_VFMADDPS512_MASK,
28569 IX86_BUILTIN_VFMADDPS512_MASK3,
28570 IX86_BUILTIN_VFMADDPS512_MASKZ,
28571 IX86_BUILTIN_VFMADDSD3_ROUND,
28572 IX86_BUILTIN_VFMADDSS3_ROUND,
28573 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28574 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28575 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28576 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28577 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28578 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28579 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28580 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28581 IX86_BUILTIN_VFMSUBPD512_MASK3,
28582 IX86_BUILTIN_VFMSUBPS512_MASK3,
28583 IX86_BUILTIN_VFMSUBSD3_MASK3,
28584 IX86_BUILTIN_VFMSUBSS3_MASK3,
28585 IX86_BUILTIN_VFNMADDPD512_MASK,
28586 IX86_BUILTIN_VFNMADDPS512_MASK,
28587 IX86_BUILTIN_VFNMSUBPD512_MASK,
28588 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28589 IX86_BUILTIN_VFNMSUBPS512_MASK,
28590 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28591 IX86_BUILTIN_VPCLZCNTD512,
28592 IX86_BUILTIN_VPCLZCNTQ512,
28593 IX86_BUILTIN_VPCONFLICTD512,
28594 IX86_BUILTIN_VPCONFLICTQ512,
28595 IX86_BUILTIN_VPERMDF512,
28596 IX86_BUILTIN_VPERMDI512,
28597 IX86_BUILTIN_VPERMI2VARD512,
28598 IX86_BUILTIN_VPERMI2VARPD512,
28599 IX86_BUILTIN_VPERMI2VARPS512,
28600 IX86_BUILTIN_VPERMI2VARQ512,
28601 IX86_BUILTIN_VPERMILPD512,
28602 IX86_BUILTIN_VPERMILPS512,
28603 IX86_BUILTIN_VPERMILVARPD512,
28604 IX86_BUILTIN_VPERMILVARPS512,
28605 IX86_BUILTIN_VPERMT2VARD512,
28606 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28607 IX86_BUILTIN_VPERMT2VARPD512,
28608 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28609 IX86_BUILTIN_VPERMT2VARPS512,
28610 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28611 IX86_BUILTIN_VPERMT2VARQ512,
28612 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28613 IX86_BUILTIN_VPERMVARDF512,
28614 IX86_BUILTIN_VPERMVARDI512,
28615 IX86_BUILTIN_VPERMVARSF512,
28616 IX86_BUILTIN_VPERMVARSI512,
28617 IX86_BUILTIN_VTERNLOGD512_MASK,
28618 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28619 IX86_BUILTIN_VTERNLOGQ512_MASK,
28620 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28622 /* Mask arithmetic operations */
28623 IX86_BUILTIN_KAND16,
28624 IX86_BUILTIN_KANDN16,
28625 IX86_BUILTIN_KNOT16,
28626 IX86_BUILTIN_KOR16,
28627 IX86_BUILTIN_KORTESTC16,
28628 IX86_BUILTIN_KORTESTZ16,
28629 IX86_BUILTIN_KUNPCKBW,
28630 IX86_BUILTIN_KXNOR16,
28631 IX86_BUILTIN_KXOR16,
28632 IX86_BUILTIN_KMOV16,
28634 /* AVX512VL. */
28635 IX86_BUILTIN_PMOVUSQD256_MEM,
28636 IX86_BUILTIN_PMOVUSQD128_MEM,
28637 IX86_BUILTIN_PMOVSQD256_MEM,
28638 IX86_BUILTIN_PMOVSQD128_MEM,
28639 IX86_BUILTIN_PMOVQD256_MEM,
28640 IX86_BUILTIN_PMOVQD128_MEM,
28641 IX86_BUILTIN_PMOVUSQW256_MEM,
28642 IX86_BUILTIN_PMOVUSQW128_MEM,
28643 IX86_BUILTIN_PMOVSQW256_MEM,
28644 IX86_BUILTIN_PMOVSQW128_MEM,
28645 IX86_BUILTIN_PMOVQW256_MEM,
28646 IX86_BUILTIN_PMOVQW128_MEM,
28647 IX86_BUILTIN_PMOVUSQB256_MEM,
28648 IX86_BUILTIN_PMOVUSQB128_MEM,
28649 IX86_BUILTIN_PMOVSQB256_MEM,
28650 IX86_BUILTIN_PMOVSQB128_MEM,
28651 IX86_BUILTIN_PMOVQB256_MEM,
28652 IX86_BUILTIN_PMOVQB128_MEM,
28653 IX86_BUILTIN_PMOVUSDW256_MEM,
28654 IX86_BUILTIN_PMOVUSDW128_MEM,
28655 IX86_BUILTIN_PMOVSDW256_MEM,
28656 IX86_BUILTIN_PMOVSDW128_MEM,
28657 IX86_BUILTIN_PMOVDW256_MEM,
28658 IX86_BUILTIN_PMOVDW128_MEM,
28659 IX86_BUILTIN_PMOVUSDB256_MEM,
28660 IX86_BUILTIN_PMOVUSDB128_MEM,
28661 IX86_BUILTIN_PMOVSDB256_MEM,
28662 IX86_BUILTIN_PMOVSDB128_MEM,
28663 IX86_BUILTIN_PMOVDB256_MEM,
28664 IX86_BUILTIN_PMOVDB128_MEM,
28665 IX86_BUILTIN_MOVDQA64LOAD256_MASK,
28666 IX86_BUILTIN_MOVDQA64LOAD128_MASK,
28667 IX86_BUILTIN_MOVDQA32LOAD256_MASK,
28668 IX86_BUILTIN_MOVDQA32LOAD128_MASK,
28669 IX86_BUILTIN_MOVDQA64STORE256_MASK,
28670 IX86_BUILTIN_MOVDQA64STORE128_MASK,
28671 IX86_BUILTIN_MOVDQA32STORE256_MASK,
28672 IX86_BUILTIN_MOVDQA32STORE128_MASK,
28673 IX86_BUILTIN_LOADAPD256_MASK,
28674 IX86_BUILTIN_LOADAPD128_MASK,
28675 IX86_BUILTIN_LOADAPS256_MASK,
28676 IX86_BUILTIN_LOADAPS128_MASK,
28677 IX86_BUILTIN_STOREAPD256_MASK,
28678 IX86_BUILTIN_STOREAPD128_MASK,
28679 IX86_BUILTIN_STOREAPS256_MASK,
28680 IX86_BUILTIN_STOREAPS128_MASK,
28681 IX86_BUILTIN_LOADUPD256_MASK,
28682 IX86_BUILTIN_LOADUPD128_MASK,
28683 IX86_BUILTIN_LOADUPS256_MASK,
28684 IX86_BUILTIN_LOADUPS128_MASK,
28685 IX86_BUILTIN_STOREUPD256_MASK,
28686 IX86_BUILTIN_STOREUPD128_MASK,
28687 IX86_BUILTIN_STOREUPS256_MASK,
28688 IX86_BUILTIN_STOREUPS128_MASK,
28689 IX86_BUILTIN_LOADDQUDI256_MASK,
28690 IX86_BUILTIN_LOADDQUDI128_MASK,
28691 IX86_BUILTIN_LOADDQUSI256_MASK,
28692 IX86_BUILTIN_LOADDQUSI128_MASK,
28693 IX86_BUILTIN_LOADDQUHI256_MASK,
28694 IX86_BUILTIN_LOADDQUHI128_MASK,
28695 IX86_BUILTIN_LOADDQUQI256_MASK,
28696 IX86_BUILTIN_LOADDQUQI128_MASK,
28697 IX86_BUILTIN_STOREDQUDI256_MASK,
28698 IX86_BUILTIN_STOREDQUDI128_MASK,
28699 IX86_BUILTIN_STOREDQUSI256_MASK,
28700 IX86_BUILTIN_STOREDQUSI128_MASK,
28701 IX86_BUILTIN_STOREDQUHI256_MASK,
28702 IX86_BUILTIN_STOREDQUHI128_MASK,
28703 IX86_BUILTIN_STOREDQUQI256_MASK,
28704 IX86_BUILTIN_STOREDQUQI128_MASK,
28705 IX86_BUILTIN_COMPRESSPDSTORE256,
28706 IX86_BUILTIN_COMPRESSPDSTORE128,
28707 IX86_BUILTIN_COMPRESSPSSTORE256,
28708 IX86_BUILTIN_COMPRESSPSSTORE128,
28709 IX86_BUILTIN_PCOMPRESSQSTORE256,
28710 IX86_BUILTIN_PCOMPRESSQSTORE128,
28711 IX86_BUILTIN_PCOMPRESSDSTORE256,
28712 IX86_BUILTIN_PCOMPRESSDSTORE128,
28713 IX86_BUILTIN_EXPANDPDLOAD256,
28714 IX86_BUILTIN_EXPANDPDLOAD128,
28715 IX86_BUILTIN_EXPANDPSLOAD256,
28716 IX86_BUILTIN_EXPANDPSLOAD128,
28717 IX86_BUILTIN_PEXPANDQLOAD256,
28718 IX86_BUILTIN_PEXPANDQLOAD128,
28719 IX86_BUILTIN_PEXPANDDLOAD256,
28720 IX86_BUILTIN_PEXPANDDLOAD128,
28721 IX86_BUILTIN_EXPANDPDLOAD256Z,
28722 IX86_BUILTIN_EXPANDPDLOAD128Z,
28723 IX86_BUILTIN_EXPANDPSLOAD256Z,
28724 IX86_BUILTIN_EXPANDPSLOAD128Z,
28725 IX86_BUILTIN_PEXPANDQLOAD256Z,
28726 IX86_BUILTIN_PEXPANDQLOAD128Z,
28727 IX86_BUILTIN_PEXPANDDLOAD256Z,
28728 IX86_BUILTIN_PEXPANDDLOAD128Z,
28729 IX86_BUILTIN_PALIGNR256_MASK,
28730 IX86_BUILTIN_PALIGNR128_MASK,
28731 IX86_BUILTIN_MOVDQA64_256_MASK,
28732 IX86_BUILTIN_MOVDQA64_128_MASK,
28733 IX86_BUILTIN_MOVDQA32_256_MASK,
28734 IX86_BUILTIN_MOVDQA32_128_MASK,
28735 IX86_BUILTIN_MOVAPD256_MASK,
28736 IX86_BUILTIN_MOVAPD128_MASK,
28737 IX86_BUILTIN_MOVAPS256_MASK,
28738 IX86_BUILTIN_MOVAPS128_MASK,
28739 IX86_BUILTIN_MOVDQUHI256_MASK,
28740 IX86_BUILTIN_MOVDQUHI128_MASK,
28741 IX86_BUILTIN_MOVDQUQI256_MASK,
28742 IX86_BUILTIN_MOVDQUQI128_MASK,
28743 IX86_BUILTIN_MINPS128_MASK,
28744 IX86_BUILTIN_MAXPS128_MASK,
28745 IX86_BUILTIN_MINPD128_MASK,
28746 IX86_BUILTIN_MAXPD128_MASK,
28747 IX86_BUILTIN_MAXPD256_MASK,
28748 IX86_BUILTIN_MAXPS256_MASK,
28749 IX86_BUILTIN_MINPD256_MASK,
28750 IX86_BUILTIN_MINPS256_MASK,
28751 IX86_BUILTIN_MULPS128_MASK,
28752 IX86_BUILTIN_DIVPS128_MASK,
28753 IX86_BUILTIN_MULPD128_MASK,
28754 IX86_BUILTIN_DIVPD128_MASK,
28755 IX86_BUILTIN_DIVPD256_MASK,
28756 IX86_BUILTIN_DIVPS256_MASK,
28757 IX86_BUILTIN_MULPD256_MASK,
28758 IX86_BUILTIN_MULPS256_MASK,
28759 IX86_BUILTIN_ADDPD128_MASK,
28760 IX86_BUILTIN_ADDPD256_MASK,
28761 IX86_BUILTIN_ADDPS128_MASK,
28762 IX86_BUILTIN_ADDPS256_MASK,
28763 IX86_BUILTIN_SUBPD128_MASK,
28764 IX86_BUILTIN_SUBPD256_MASK,
28765 IX86_BUILTIN_SUBPS128_MASK,
28766 IX86_BUILTIN_SUBPS256_MASK,
28767 IX86_BUILTIN_XORPD256_MASK,
28768 IX86_BUILTIN_XORPD128_MASK,
28769 IX86_BUILTIN_XORPS256_MASK,
28770 IX86_BUILTIN_XORPS128_MASK,
28771 IX86_BUILTIN_ORPD256_MASK,
28772 IX86_BUILTIN_ORPD128_MASK,
28773 IX86_BUILTIN_ORPS256_MASK,
28774 IX86_BUILTIN_ORPS128_MASK,
28775 IX86_BUILTIN_BROADCASTF32x2_256,
28776 IX86_BUILTIN_BROADCASTI32x2_256,
28777 IX86_BUILTIN_BROADCASTI32x2_128,
28778 IX86_BUILTIN_BROADCASTF64X2_256,
28779 IX86_BUILTIN_BROADCASTI64X2_256,
28780 IX86_BUILTIN_BROADCASTF32X4_256,
28781 IX86_BUILTIN_BROADCASTI32X4_256,
28782 IX86_BUILTIN_EXTRACTF32X4_256,
28783 IX86_BUILTIN_EXTRACTI32X4_256,
28784 IX86_BUILTIN_DBPSADBW256,
28785 IX86_BUILTIN_DBPSADBW128,
28786 IX86_BUILTIN_CVTTPD2QQ256,
28787 IX86_BUILTIN_CVTTPD2QQ128,
28788 IX86_BUILTIN_CVTTPD2UQQ256,
28789 IX86_BUILTIN_CVTTPD2UQQ128,
28790 IX86_BUILTIN_CVTPD2QQ256,
28791 IX86_BUILTIN_CVTPD2QQ128,
28792 IX86_BUILTIN_CVTPD2UQQ256,
28793 IX86_BUILTIN_CVTPD2UQQ128,
28794 IX86_BUILTIN_CVTPD2UDQ256_MASK,
28795 IX86_BUILTIN_CVTPD2UDQ128_MASK,
28796 IX86_BUILTIN_CVTTPS2QQ256,
28797 IX86_BUILTIN_CVTTPS2QQ128,
28798 IX86_BUILTIN_CVTTPS2UQQ256,
28799 IX86_BUILTIN_CVTTPS2UQQ128,
28800 IX86_BUILTIN_CVTTPS2DQ256_MASK,
28801 IX86_BUILTIN_CVTTPS2DQ128_MASK,
28802 IX86_BUILTIN_CVTTPS2UDQ256,
28803 IX86_BUILTIN_CVTTPS2UDQ128,
28804 IX86_BUILTIN_CVTTPD2DQ256_MASK,
28805 IX86_BUILTIN_CVTTPD2DQ128_MASK,
28806 IX86_BUILTIN_CVTTPD2UDQ256_MASK,
28807 IX86_BUILTIN_CVTTPD2UDQ128_MASK,
28808 IX86_BUILTIN_CVTPD2DQ256_MASK,
28809 IX86_BUILTIN_CVTPD2DQ128_MASK,
28810 IX86_BUILTIN_CVTDQ2PD256_MASK,
28811 IX86_BUILTIN_CVTDQ2PD128_MASK,
28812 IX86_BUILTIN_CVTUDQ2PD256_MASK,
28813 IX86_BUILTIN_CVTUDQ2PD128_MASK,
28814 IX86_BUILTIN_CVTDQ2PS256_MASK,
28815 IX86_BUILTIN_CVTDQ2PS128_MASK,
28816 IX86_BUILTIN_CVTUDQ2PS256_MASK,
28817 IX86_BUILTIN_CVTUDQ2PS128_MASK,
28818 IX86_BUILTIN_CVTPS2PD256_MASK,
28819 IX86_BUILTIN_CVTPS2PD128_MASK,
28820 IX86_BUILTIN_PBROADCASTB256_MASK,
28821 IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
28822 IX86_BUILTIN_PBROADCASTB128_MASK,
28823 IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
28824 IX86_BUILTIN_PBROADCASTW256_MASK,
28825 IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
28826 IX86_BUILTIN_PBROADCASTW128_MASK,
28827 IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
28828 IX86_BUILTIN_PBROADCASTD256_MASK,
28829 IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
28830 IX86_BUILTIN_PBROADCASTD128_MASK,
28831 IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
28832 IX86_BUILTIN_PBROADCASTQ256_MASK,
28833 IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
28834 IX86_BUILTIN_PBROADCASTQ256_MEM_MASK,
28835 IX86_BUILTIN_PBROADCASTQ128_MASK,
28836 IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
28837 IX86_BUILTIN_PBROADCASTQ128_MEM_MASK,
28838 IX86_BUILTIN_BROADCASTSS256,
28839 IX86_BUILTIN_BROADCASTSS128,
28840 IX86_BUILTIN_BROADCASTSD256,
28841 IX86_BUILTIN_EXTRACTF64X2_256,
28842 IX86_BUILTIN_EXTRACTI64X2_256,
28843 IX86_BUILTIN_INSERTF32X4_256,
28844 IX86_BUILTIN_INSERTI32X4_256,
28845 IX86_BUILTIN_PMOVSXBW256_MASK,
28846 IX86_BUILTIN_PMOVSXBW128_MASK,
28847 IX86_BUILTIN_PMOVSXBD256_MASK,
28848 IX86_BUILTIN_PMOVSXBD128_MASK,
28849 IX86_BUILTIN_PMOVSXBQ256_MASK,
28850 IX86_BUILTIN_PMOVSXBQ128_MASK,
28851 IX86_BUILTIN_PMOVSXWD256_MASK,
28852 IX86_BUILTIN_PMOVSXWD128_MASK,
28853 IX86_BUILTIN_PMOVSXWQ256_MASK,
28854 IX86_BUILTIN_PMOVSXWQ128_MASK,
28855 IX86_BUILTIN_PMOVSXDQ256_MASK,
28856 IX86_BUILTIN_PMOVSXDQ128_MASK,
28857 IX86_BUILTIN_PMOVZXBW256_MASK,
28858 IX86_BUILTIN_PMOVZXBW128_MASK,
28859 IX86_BUILTIN_PMOVZXBD256_MASK,
28860 IX86_BUILTIN_PMOVZXBD128_MASK,
28861 IX86_BUILTIN_PMOVZXBQ256_MASK,
28862 IX86_BUILTIN_PMOVZXBQ128_MASK,
28863 IX86_BUILTIN_PMOVZXWD256_MASK,
28864 IX86_BUILTIN_PMOVZXWD128_MASK,
28865 IX86_BUILTIN_PMOVZXWQ256_MASK,
28866 IX86_BUILTIN_PMOVZXWQ128_MASK,
28867 IX86_BUILTIN_PMOVZXDQ256_MASK,
28868 IX86_BUILTIN_PMOVZXDQ128_MASK,
28869 IX86_BUILTIN_REDUCEPD256_MASK,
28870 IX86_BUILTIN_REDUCEPD128_MASK,
28871 IX86_BUILTIN_REDUCEPS256_MASK,
28872 IX86_BUILTIN_REDUCEPS128_MASK,
28873 IX86_BUILTIN_REDUCESD_MASK,
28874 IX86_BUILTIN_REDUCESS_MASK,
28875 IX86_BUILTIN_VPERMVARHI256_MASK,
28876 IX86_BUILTIN_VPERMVARHI128_MASK,
28877 IX86_BUILTIN_VPERMT2VARHI256,
28878 IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
28879 IX86_BUILTIN_VPERMT2VARHI128,
28880 IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
28881 IX86_BUILTIN_VPERMI2VARHI256,
28882 IX86_BUILTIN_VPERMI2VARHI128,
28883 IX86_BUILTIN_RCP14PD256,
28884 IX86_BUILTIN_RCP14PD128,
28885 IX86_BUILTIN_RCP14PS256,
28886 IX86_BUILTIN_RCP14PS128,
28887 IX86_BUILTIN_RSQRT14PD256_MASK,
28888 IX86_BUILTIN_RSQRT14PD128_MASK,
28889 IX86_BUILTIN_RSQRT14PS256_MASK,
28890 IX86_BUILTIN_RSQRT14PS128_MASK,
28891 IX86_BUILTIN_SQRTPD256_MASK,
28892 IX86_BUILTIN_SQRTPD128_MASK,
28893 IX86_BUILTIN_SQRTPS256_MASK,
28894 IX86_BUILTIN_SQRTPS128_MASK,
28895 IX86_BUILTIN_PADDB128_MASK,
28896 IX86_BUILTIN_PADDW128_MASK,
28897 IX86_BUILTIN_PADDD128_MASK,
28898 IX86_BUILTIN_PADDQ128_MASK,
28899 IX86_BUILTIN_PSUBB128_MASK,
28900 IX86_BUILTIN_PSUBW128_MASK,
28901 IX86_BUILTIN_PSUBD128_MASK,
28902 IX86_BUILTIN_PSUBQ128_MASK,
28903 IX86_BUILTIN_PADDSB128_MASK,
28904 IX86_BUILTIN_PADDSW128_MASK,
28905 IX86_BUILTIN_PSUBSB128_MASK,
28906 IX86_BUILTIN_PSUBSW128_MASK,
28907 IX86_BUILTIN_PADDUSB128_MASK,
28908 IX86_BUILTIN_PADDUSW128_MASK,
28909 IX86_BUILTIN_PSUBUSB128_MASK,
28910 IX86_BUILTIN_PSUBUSW128_MASK,
28911 IX86_BUILTIN_PADDB256_MASK,
28912 IX86_BUILTIN_PADDW256_MASK,
28913 IX86_BUILTIN_PADDD256_MASK,
28914 IX86_BUILTIN_PADDQ256_MASK,
28915 IX86_BUILTIN_PADDSB256_MASK,
28916 IX86_BUILTIN_PADDSW256_MASK,
28917 IX86_BUILTIN_PADDUSB256_MASK,
28918 IX86_BUILTIN_PADDUSW256_MASK,
28919 IX86_BUILTIN_PSUBB256_MASK,
28920 IX86_BUILTIN_PSUBW256_MASK,
28921 IX86_BUILTIN_PSUBD256_MASK,
28922 IX86_BUILTIN_PSUBQ256_MASK,
28923 IX86_BUILTIN_PSUBSB256_MASK,
28924 IX86_BUILTIN_PSUBSW256_MASK,
28925 IX86_BUILTIN_PSUBUSB256_MASK,
28926 IX86_BUILTIN_PSUBUSW256_MASK,
28927 IX86_BUILTIN_SHUF_F64x2_256,
28928 IX86_BUILTIN_SHUF_I64x2_256,
28929 IX86_BUILTIN_SHUF_I32x4_256,
28930 IX86_BUILTIN_SHUF_F32x4_256,
28931 IX86_BUILTIN_PMOVWB128,
28932 IX86_BUILTIN_PMOVWB256,
28933 IX86_BUILTIN_PMOVSWB128,
28934 IX86_BUILTIN_PMOVSWB256,
28935 IX86_BUILTIN_PMOVUSWB128,
28936 IX86_BUILTIN_PMOVUSWB256,
28937 IX86_BUILTIN_PMOVDB128,
28938 IX86_BUILTIN_PMOVDB256,
28939 IX86_BUILTIN_PMOVSDB128,
28940 IX86_BUILTIN_PMOVSDB256,
28941 IX86_BUILTIN_PMOVUSDB128,
28942 IX86_BUILTIN_PMOVUSDB256,
28943 IX86_BUILTIN_PMOVDW128,
28944 IX86_BUILTIN_PMOVDW256,
28945 IX86_BUILTIN_PMOVSDW128,
28946 IX86_BUILTIN_PMOVSDW256,
28947 IX86_BUILTIN_PMOVUSDW128,
28948 IX86_BUILTIN_PMOVUSDW256,
28949 IX86_BUILTIN_PMOVQB128,
28950 IX86_BUILTIN_PMOVQB256,
28951 IX86_BUILTIN_PMOVSQB128,
28952 IX86_BUILTIN_PMOVSQB256,
28953 IX86_BUILTIN_PMOVUSQB128,
28954 IX86_BUILTIN_PMOVUSQB256,
28955 IX86_BUILTIN_PMOVQW128,
28956 IX86_BUILTIN_PMOVQW256,
28957 IX86_BUILTIN_PMOVSQW128,
28958 IX86_BUILTIN_PMOVSQW256,
28959 IX86_BUILTIN_PMOVUSQW128,
28960 IX86_BUILTIN_PMOVUSQW256,
28961 IX86_BUILTIN_PMOVQD128,
28962 IX86_BUILTIN_PMOVQD256,
28963 IX86_BUILTIN_PMOVSQD128,
28964 IX86_BUILTIN_PMOVSQD256,
28965 IX86_BUILTIN_PMOVUSQD128,
28966 IX86_BUILTIN_PMOVUSQD256,
28967 IX86_BUILTIN_RANGEPD256,
28968 IX86_BUILTIN_RANGEPD128,
28969 IX86_BUILTIN_RANGEPS256,
28970 IX86_BUILTIN_RANGEPS128,
28971 IX86_BUILTIN_GETEXPPS256,
28972 IX86_BUILTIN_GETEXPPD256,
28973 IX86_BUILTIN_GETEXPPS128,
28974 IX86_BUILTIN_GETEXPPD128,
28975 IX86_BUILTIN_FIXUPIMMPD256,
28976 IX86_BUILTIN_FIXUPIMMPD256_MASK,
28977 IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
28978 IX86_BUILTIN_FIXUPIMMPS256,
28979 IX86_BUILTIN_FIXUPIMMPS256_MASK,
28980 IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
28981 IX86_BUILTIN_FIXUPIMMPD128,
28982 IX86_BUILTIN_FIXUPIMMPD128_MASK,
28983 IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
28984 IX86_BUILTIN_FIXUPIMMPS128,
28985 IX86_BUILTIN_FIXUPIMMPS128_MASK,
28986 IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
28987 IX86_BUILTIN_PABSQ256,
28988 IX86_BUILTIN_PABSQ128,
28989 IX86_BUILTIN_PABSD256_MASK,
28990 IX86_BUILTIN_PABSD128_MASK,
28991 IX86_BUILTIN_PMULHRSW256_MASK,
28992 IX86_BUILTIN_PMULHRSW128_MASK,
28993 IX86_BUILTIN_PMULHUW128_MASK,
28994 IX86_BUILTIN_PMULHUW256_MASK,
28995 IX86_BUILTIN_PMULHW256_MASK,
28996 IX86_BUILTIN_PMULHW128_MASK,
28997 IX86_BUILTIN_PMULLW256_MASK,
28998 IX86_BUILTIN_PMULLW128_MASK,
28999 IX86_BUILTIN_PMULLQ256,
29000 IX86_BUILTIN_PMULLQ128,
29001 IX86_BUILTIN_ANDPD256_MASK,
29002 IX86_BUILTIN_ANDPD128_MASK,
29003 IX86_BUILTIN_ANDPS256_MASK,
29004 IX86_BUILTIN_ANDPS128_MASK,
29005 IX86_BUILTIN_ANDNPD256_MASK,
29006 IX86_BUILTIN_ANDNPD128_MASK,
29007 IX86_BUILTIN_ANDNPS256_MASK,
29008 IX86_BUILTIN_ANDNPS128_MASK,
29009 IX86_BUILTIN_PSLLWI128_MASK,
29010 IX86_BUILTIN_PSLLDI128_MASK,
29011 IX86_BUILTIN_PSLLQI128_MASK,
29012 IX86_BUILTIN_PSLLW128_MASK,
29013 IX86_BUILTIN_PSLLD128_MASK,
29014 IX86_BUILTIN_PSLLQ128_MASK,
29015 IX86_BUILTIN_PSLLWI256_MASK ,
29016 IX86_BUILTIN_PSLLW256_MASK,
29017 IX86_BUILTIN_PSLLDI256_MASK,
29018 IX86_BUILTIN_PSLLD256_MASK,
29019 IX86_BUILTIN_PSLLQI256_MASK,
29020 IX86_BUILTIN_PSLLQ256_MASK,
29021 IX86_BUILTIN_PSRADI128_MASK,
29022 IX86_BUILTIN_PSRAD128_MASK,
29023 IX86_BUILTIN_PSRADI256_MASK,
29024 IX86_BUILTIN_PSRAD256_MASK,
29025 IX86_BUILTIN_PSRAQI128_MASK,
29026 IX86_BUILTIN_PSRAQ128_MASK,
29027 IX86_BUILTIN_PSRAQI256_MASK,
29028 IX86_BUILTIN_PSRAQ256_MASK,
29029 IX86_BUILTIN_PANDD256,
29030 IX86_BUILTIN_PANDD128,
29031 IX86_BUILTIN_PSRLDI128_MASK,
29032 IX86_BUILTIN_PSRLD128_MASK,
29033 IX86_BUILTIN_PSRLDI256_MASK,
29034 IX86_BUILTIN_PSRLD256_MASK,
29035 IX86_BUILTIN_PSRLQI128_MASK,
29036 IX86_BUILTIN_PSRLQ128_MASK,
29037 IX86_BUILTIN_PSRLQI256_MASK,
29038 IX86_BUILTIN_PSRLQ256_MASK,
29039 IX86_BUILTIN_PANDQ256,
29040 IX86_BUILTIN_PANDQ128,
29041 IX86_BUILTIN_PANDND256,
29042 IX86_BUILTIN_PANDND128,
29043 IX86_BUILTIN_PANDNQ256,
29044 IX86_BUILTIN_PANDNQ128,
29045 IX86_BUILTIN_PORD256,
29046 IX86_BUILTIN_PORD128,
29047 IX86_BUILTIN_PORQ256,
29048 IX86_BUILTIN_PORQ128,
29049 IX86_BUILTIN_PXORD256,
29050 IX86_BUILTIN_PXORD128,
29051 IX86_BUILTIN_PXORQ256,
29052 IX86_BUILTIN_PXORQ128,
29053 IX86_BUILTIN_PACKSSWB256_MASK,
29054 IX86_BUILTIN_PACKSSWB128_MASK,
29055 IX86_BUILTIN_PACKUSWB256_MASK,
29056 IX86_BUILTIN_PACKUSWB128_MASK,
29057 IX86_BUILTIN_RNDSCALEPS256,
29058 IX86_BUILTIN_RNDSCALEPD256,
29059 IX86_BUILTIN_RNDSCALEPS128,
29060 IX86_BUILTIN_RNDSCALEPD128,
29061 IX86_BUILTIN_VTERNLOGQ256_MASK,
29062 IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29063 IX86_BUILTIN_VTERNLOGD256_MASK,
29064 IX86_BUILTIN_VTERNLOGD256_MASKZ,
29065 IX86_BUILTIN_VTERNLOGQ128_MASK,
29066 IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29067 IX86_BUILTIN_VTERNLOGD128_MASK,
29068 IX86_BUILTIN_VTERNLOGD128_MASKZ,
29069 IX86_BUILTIN_SCALEFPD256,
29070 IX86_BUILTIN_SCALEFPS256,
29071 IX86_BUILTIN_SCALEFPD128,
29072 IX86_BUILTIN_SCALEFPS128,
29073 IX86_BUILTIN_VFMADDPD256_MASK,
29074 IX86_BUILTIN_VFMADDPD256_MASK3,
29075 IX86_BUILTIN_VFMADDPD256_MASKZ,
29076 IX86_BUILTIN_VFMADDPD128_MASK,
29077 IX86_BUILTIN_VFMADDPD128_MASK3,
29078 IX86_BUILTIN_VFMADDPD128_MASKZ,
29079 IX86_BUILTIN_VFMADDPS256_MASK,
29080 IX86_BUILTIN_VFMADDPS256_MASK3,
29081 IX86_BUILTIN_VFMADDPS256_MASKZ,
29082 IX86_BUILTIN_VFMADDPS128_MASK,
29083 IX86_BUILTIN_VFMADDPS128_MASK3,
29084 IX86_BUILTIN_VFMADDPS128_MASKZ,
29085 IX86_BUILTIN_VFMSUBPD256_MASK3,
29086 IX86_BUILTIN_VFMSUBPD128_MASK3,
29087 IX86_BUILTIN_VFMSUBPS256_MASK3,
29088 IX86_BUILTIN_VFMSUBPS128_MASK3,
29089 IX86_BUILTIN_VFNMADDPD256_MASK,
29090 IX86_BUILTIN_VFNMADDPD128_MASK,
29091 IX86_BUILTIN_VFNMADDPS256_MASK,
29092 IX86_BUILTIN_VFNMADDPS128_MASK,
29093 IX86_BUILTIN_VFNMSUBPD256_MASK,
29094 IX86_BUILTIN_VFNMSUBPD256_MASK3,
29095 IX86_BUILTIN_VFNMSUBPD128_MASK,
29096 IX86_BUILTIN_VFNMSUBPD128_MASK3,
29097 IX86_BUILTIN_VFNMSUBPS256_MASK,
29098 IX86_BUILTIN_VFNMSUBPS256_MASK3,
29099 IX86_BUILTIN_VFNMSUBPS128_MASK,
29100 IX86_BUILTIN_VFNMSUBPS128_MASK3,
29101 IX86_BUILTIN_VFMADDSUBPD256_MASK,
29102 IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29103 IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29104 IX86_BUILTIN_VFMADDSUBPD128_MASK,
29105 IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29106 IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29107 IX86_BUILTIN_VFMADDSUBPS256_MASK,
29108 IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29109 IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29110 IX86_BUILTIN_VFMADDSUBPS128_MASK,
29111 IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29112 IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29113 IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29114 IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29115 IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29116 IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29117 IX86_BUILTIN_INSERTF64X2_256,
29118 IX86_BUILTIN_INSERTI64X2_256,
29119 IX86_BUILTIN_PSRAVV16HI,
29120 IX86_BUILTIN_PSRAVV8HI,
29121 IX86_BUILTIN_PMADDUBSW256_MASK,
29122 IX86_BUILTIN_PMADDUBSW128_MASK,
29123 IX86_BUILTIN_PMADDWD256_MASK,
29124 IX86_BUILTIN_PMADDWD128_MASK,
29125 IX86_BUILTIN_PSRLVV16HI,
29126 IX86_BUILTIN_PSRLVV8HI,
29127 IX86_BUILTIN_CVTPS2DQ256_MASK,
29128 IX86_BUILTIN_CVTPS2DQ128_MASK,
29129 IX86_BUILTIN_CVTPS2UDQ256,
29130 IX86_BUILTIN_CVTPS2UDQ128,
29131 IX86_BUILTIN_CVTPS2QQ256,
29132 IX86_BUILTIN_CVTPS2QQ128,
29133 IX86_BUILTIN_CVTPS2UQQ256,
29134 IX86_BUILTIN_CVTPS2UQQ128,
29135 IX86_BUILTIN_GETMANTPS256,
29136 IX86_BUILTIN_GETMANTPS128,
29137 IX86_BUILTIN_GETMANTPD256,
29138 IX86_BUILTIN_GETMANTPD128,
29139 IX86_BUILTIN_MOVDDUP256_MASK,
29140 IX86_BUILTIN_MOVDDUP128_MASK,
29141 IX86_BUILTIN_MOVSHDUP256_MASK,
29142 IX86_BUILTIN_MOVSHDUP128_MASK,
29143 IX86_BUILTIN_MOVSLDUP256_MASK,
29144 IX86_BUILTIN_MOVSLDUP128_MASK,
29145 IX86_BUILTIN_CVTQQ2PS256,
29146 IX86_BUILTIN_CVTQQ2PS128,
29147 IX86_BUILTIN_CVTUQQ2PS256,
29148 IX86_BUILTIN_CVTUQQ2PS128,
29149 IX86_BUILTIN_CVTQQ2PD256,
29150 IX86_BUILTIN_CVTQQ2PD128,
29151 IX86_BUILTIN_CVTUQQ2PD256,
29152 IX86_BUILTIN_CVTUQQ2PD128,
29153 IX86_BUILTIN_VPERMT2VARQ256,
29154 IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29155 IX86_BUILTIN_VPERMT2VARD256,
29156 IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29157 IX86_BUILTIN_VPERMI2VARQ256,
29158 IX86_BUILTIN_VPERMI2VARD256,
29159 IX86_BUILTIN_VPERMT2VARPD256,
29160 IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29161 IX86_BUILTIN_VPERMT2VARPS256,
29162 IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29163 IX86_BUILTIN_VPERMI2VARPD256,
29164 IX86_BUILTIN_VPERMI2VARPS256,
29165 IX86_BUILTIN_VPERMT2VARQ128,
29166 IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29167 IX86_BUILTIN_VPERMT2VARD128,
29168 IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29169 IX86_BUILTIN_VPERMI2VARQ128,
29170 IX86_BUILTIN_VPERMI2VARD128,
29171 IX86_BUILTIN_VPERMT2VARPD128,
29172 IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29173 IX86_BUILTIN_VPERMT2VARPS128,
29174 IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29175 IX86_BUILTIN_VPERMI2VARPD128,
29176 IX86_BUILTIN_VPERMI2VARPS128,
29177 IX86_BUILTIN_PSHUFB256_MASK,
29178 IX86_BUILTIN_PSHUFB128_MASK,
29179 IX86_BUILTIN_PSHUFHW256_MASK,
29180 IX86_BUILTIN_PSHUFHW128_MASK,
29181 IX86_BUILTIN_PSHUFLW256_MASK,
29182 IX86_BUILTIN_PSHUFLW128_MASK,
29183 IX86_BUILTIN_PSHUFD256_MASK,
29184 IX86_BUILTIN_PSHUFD128_MASK,
29185 IX86_BUILTIN_SHUFPD256_MASK,
29186 IX86_BUILTIN_SHUFPD128_MASK,
29187 IX86_BUILTIN_SHUFPS256_MASK,
29188 IX86_BUILTIN_SHUFPS128_MASK,
29189 IX86_BUILTIN_PROLVQ256,
29190 IX86_BUILTIN_PROLVQ128,
29191 IX86_BUILTIN_PROLQ256,
29192 IX86_BUILTIN_PROLQ128,
29193 IX86_BUILTIN_PRORVQ256,
29194 IX86_BUILTIN_PRORVQ128,
29195 IX86_BUILTIN_PRORQ256,
29196 IX86_BUILTIN_PRORQ128,
29197 IX86_BUILTIN_PSRAVQ128,
29198 IX86_BUILTIN_PSRAVQ256,
29199 IX86_BUILTIN_PSLLVV4DI_MASK,
29200 IX86_BUILTIN_PSLLVV2DI_MASK,
29201 IX86_BUILTIN_PSLLVV8SI_MASK,
29202 IX86_BUILTIN_PSLLVV4SI_MASK,
29203 IX86_BUILTIN_PSRAVV8SI_MASK,
29204 IX86_BUILTIN_PSRAVV4SI_MASK,
29205 IX86_BUILTIN_PSRLVV4DI_MASK,
29206 IX86_BUILTIN_PSRLVV2DI_MASK,
29207 IX86_BUILTIN_PSRLVV8SI_MASK,
29208 IX86_BUILTIN_PSRLVV4SI_MASK,
29209 IX86_BUILTIN_PSRAWI256_MASK,
29210 IX86_BUILTIN_PSRAW256_MASK,
29211 IX86_BUILTIN_PSRAWI128_MASK,
29212 IX86_BUILTIN_PSRAW128_MASK,
29213 IX86_BUILTIN_PSRLWI256_MASK,
29214 IX86_BUILTIN_PSRLW256_MASK,
29215 IX86_BUILTIN_PSRLWI128_MASK,
29216 IX86_BUILTIN_PSRLW128_MASK,
29217 IX86_BUILTIN_PRORVD256,
29218 IX86_BUILTIN_PROLVD256,
29219 IX86_BUILTIN_PRORD256,
29220 IX86_BUILTIN_PROLD256,
29221 IX86_BUILTIN_PRORVD128,
29222 IX86_BUILTIN_PROLVD128,
29223 IX86_BUILTIN_PRORD128,
29224 IX86_BUILTIN_PROLD128,
29225 IX86_BUILTIN_FPCLASSPD256,
29226 IX86_BUILTIN_FPCLASSPD128,
29227 IX86_BUILTIN_FPCLASSSD,
29228 IX86_BUILTIN_FPCLASSPS256,
29229 IX86_BUILTIN_FPCLASSPS128,
29230 IX86_BUILTIN_FPCLASSSS,
29231 IX86_BUILTIN_CVTB2MASK128,
29232 IX86_BUILTIN_CVTB2MASK256,
29233 IX86_BUILTIN_CVTW2MASK128,
29234 IX86_BUILTIN_CVTW2MASK256,
29235 IX86_BUILTIN_CVTD2MASK128,
29236 IX86_BUILTIN_CVTD2MASK256,
29237 IX86_BUILTIN_CVTQ2MASK128,
29238 IX86_BUILTIN_CVTQ2MASK256,
29239 IX86_BUILTIN_CVTMASK2B128,
29240 IX86_BUILTIN_CVTMASK2B256,
29241 IX86_BUILTIN_CVTMASK2W128,
29242 IX86_BUILTIN_CVTMASK2W256,
29243 IX86_BUILTIN_CVTMASK2D128,
29244 IX86_BUILTIN_CVTMASK2D256,
29245 IX86_BUILTIN_CVTMASK2Q128,
29246 IX86_BUILTIN_CVTMASK2Q256,
29247 IX86_BUILTIN_PCMPEQB128_MASK,
29248 IX86_BUILTIN_PCMPEQB256_MASK,
29249 IX86_BUILTIN_PCMPEQW128_MASK,
29250 IX86_BUILTIN_PCMPEQW256_MASK,
29251 IX86_BUILTIN_PCMPEQD128_MASK,
29252 IX86_BUILTIN_PCMPEQD256_MASK,
29253 IX86_BUILTIN_PCMPEQQ128_MASK,
29254 IX86_BUILTIN_PCMPEQQ256_MASK,
29255 IX86_BUILTIN_PCMPGTB128_MASK,
29256 IX86_BUILTIN_PCMPGTB256_MASK,
29257 IX86_BUILTIN_PCMPGTW128_MASK,
29258 IX86_BUILTIN_PCMPGTW256_MASK,
29259 IX86_BUILTIN_PCMPGTD128_MASK,
29260 IX86_BUILTIN_PCMPGTD256_MASK,
29261 IX86_BUILTIN_PCMPGTQ128_MASK,
29262 IX86_BUILTIN_PCMPGTQ256_MASK,
29263 IX86_BUILTIN_PTESTMB128,
29264 IX86_BUILTIN_PTESTMB256,
29265 IX86_BUILTIN_PTESTMW128,
29266 IX86_BUILTIN_PTESTMW256,
29267 IX86_BUILTIN_PTESTMD128,
29268 IX86_BUILTIN_PTESTMD256,
29269 IX86_BUILTIN_PTESTMQ128,
29270 IX86_BUILTIN_PTESTMQ256,
29271 IX86_BUILTIN_PTESTNMB128,
29272 IX86_BUILTIN_PTESTNMB256,
29273 IX86_BUILTIN_PTESTNMW128,
29274 IX86_BUILTIN_PTESTNMW256,
29275 IX86_BUILTIN_PTESTNMD128,
29276 IX86_BUILTIN_PTESTNMD256,
29277 IX86_BUILTIN_PTESTNMQ128,
29278 IX86_BUILTIN_PTESTNMQ256,
29279 IX86_BUILTIN_PBROADCASTMB128,
29280 IX86_BUILTIN_PBROADCASTMB256,
29281 IX86_BUILTIN_PBROADCASTMW128,
29282 IX86_BUILTIN_PBROADCASTMW256,
29283 IX86_BUILTIN_COMPRESSPD256,
29284 IX86_BUILTIN_COMPRESSPD128,
29285 IX86_BUILTIN_COMPRESSPS256,
29286 IX86_BUILTIN_COMPRESSPS128,
29287 IX86_BUILTIN_PCOMPRESSQ256,
29288 IX86_BUILTIN_PCOMPRESSQ128,
29289 IX86_BUILTIN_PCOMPRESSD256,
29290 IX86_BUILTIN_PCOMPRESSD128,
29291 IX86_BUILTIN_EXPANDPD256,
29292 IX86_BUILTIN_EXPANDPD128,
29293 IX86_BUILTIN_EXPANDPS256,
29294 IX86_BUILTIN_EXPANDPS128,
29295 IX86_BUILTIN_PEXPANDQ256,
29296 IX86_BUILTIN_PEXPANDQ128,
29297 IX86_BUILTIN_PEXPANDD256,
29298 IX86_BUILTIN_PEXPANDD128,
29299 IX86_BUILTIN_EXPANDPD256Z,
29300 IX86_BUILTIN_EXPANDPD128Z,
29301 IX86_BUILTIN_EXPANDPS256Z,
29302 IX86_BUILTIN_EXPANDPS128Z,
29303 IX86_BUILTIN_PEXPANDQ256Z,
29304 IX86_BUILTIN_PEXPANDQ128Z,
29305 IX86_BUILTIN_PEXPANDD256Z,
29306 IX86_BUILTIN_PEXPANDD128Z,
29307 IX86_BUILTIN_PMAXSD256_MASK,
29308 IX86_BUILTIN_PMINSD256_MASK,
29309 IX86_BUILTIN_PMAXUD256_MASK,
29310 IX86_BUILTIN_PMINUD256_MASK,
29311 IX86_BUILTIN_PMAXSD128_MASK,
29312 IX86_BUILTIN_PMINSD128_MASK,
29313 IX86_BUILTIN_PMAXUD128_MASK,
29314 IX86_BUILTIN_PMINUD128_MASK,
29315 IX86_BUILTIN_PMAXSQ256_MASK,
29316 IX86_BUILTIN_PMINSQ256_MASK,
29317 IX86_BUILTIN_PMAXUQ256_MASK,
29318 IX86_BUILTIN_PMINUQ256_MASK,
29319 IX86_BUILTIN_PMAXSQ128_MASK,
29320 IX86_BUILTIN_PMINSQ128_MASK,
29321 IX86_BUILTIN_PMAXUQ128_MASK,
29322 IX86_BUILTIN_PMINUQ128_MASK,
29323 IX86_BUILTIN_PMINSB256_MASK,
29324 IX86_BUILTIN_PMINUB256_MASK,
29325 IX86_BUILTIN_PMAXSB256_MASK,
29326 IX86_BUILTIN_PMAXUB256_MASK,
29327 IX86_BUILTIN_PMINSB128_MASK,
29328 IX86_BUILTIN_PMINUB128_MASK,
29329 IX86_BUILTIN_PMAXSB128_MASK,
29330 IX86_BUILTIN_PMAXUB128_MASK,
29331 IX86_BUILTIN_PMINSW256_MASK,
29332 IX86_BUILTIN_PMINUW256_MASK,
29333 IX86_BUILTIN_PMAXSW256_MASK,
29334 IX86_BUILTIN_PMAXUW256_MASK,
29335 IX86_BUILTIN_PMINSW128_MASK,
29336 IX86_BUILTIN_PMINUW128_MASK,
29337 IX86_BUILTIN_PMAXSW128_MASK,
29338 IX86_BUILTIN_PMAXUW128_MASK,
29339 IX86_BUILTIN_VPCONFLICTQ256,
29340 IX86_BUILTIN_VPCONFLICTD256,
29341 IX86_BUILTIN_VPCLZCNTQ256,
29342 IX86_BUILTIN_VPCLZCNTD256,
29343 IX86_BUILTIN_UNPCKHPD256_MASK,
29344 IX86_BUILTIN_UNPCKHPD128_MASK,
29345 IX86_BUILTIN_UNPCKHPS256_MASK,
29346 IX86_BUILTIN_UNPCKHPS128_MASK,
29347 IX86_BUILTIN_UNPCKLPD256_MASK,
29348 IX86_BUILTIN_UNPCKLPD128_MASK,
29349 IX86_BUILTIN_UNPCKLPS256_MASK,
29350 IX86_BUILTIN_VPCONFLICTQ128,
29351 IX86_BUILTIN_VPCONFLICTD128,
29352 IX86_BUILTIN_VPCLZCNTQ128,
29353 IX86_BUILTIN_VPCLZCNTD128,
29354 IX86_BUILTIN_UNPCKLPS128_MASK,
29355 IX86_BUILTIN_ALIGND256,
29356 IX86_BUILTIN_ALIGNQ256,
29357 IX86_BUILTIN_ALIGND128,
29358 IX86_BUILTIN_ALIGNQ128,
29359 IX86_BUILTIN_CVTPS2PH256_MASK,
29360 IX86_BUILTIN_CVTPS2PH_MASK,
29361 IX86_BUILTIN_CVTPH2PS_MASK,
29362 IX86_BUILTIN_CVTPH2PS256_MASK,
29363 IX86_BUILTIN_PUNPCKHDQ128_MASK,
29364 IX86_BUILTIN_PUNPCKHDQ256_MASK,
29365 IX86_BUILTIN_PUNPCKHQDQ128_MASK,
29366 IX86_BUILTIN_PUNPCKHQDQ256_MASK,
29367 IX86_BUILTIN_PUNPCKLDQ128_MASK,
29368 IX86_BUILTIN_PUNPCKLDQ256_MASK,
29369 IX86_BUILTIN_PUNPCKLQDQ128_MASK,
29370 IX86_BUILTIN_PUNPCKLQDQ256_MASK,
29371 IX86_BUILTIN_PUNPCKHBW128_MASK,
29372 IX86_BUILTIN_PUNPCKHBW256_MASK,
29373 IX86_BUILTIN_PUNPCKHWD128_MASK,
29374 IX86_BUILTIN_PUNPCKHWD256_MASK,
29375 IX86_BUILTIN_PUNPCKLBW128_MASK,
29376 IX86_BUILTIN_PUNPCKLBW256_MASK,
29377 IX86_BUILTIN_PUNPCKLWD128_MASK,
29378 IX86_BUILTIN_PUNPCKLWD256_MASK,
29379 IX86_BUILTIN_PSLLVV16HI,
29380 IX86_BUILTIN_PSLLVV8HI,
29381 IX86_BUILTIN_PACKSSDW256_MASK,
29382 IX86_BUILTIN_PACKSSDW128_MASK,
29383 IX86_BUILTIN_PACKUSDW256_MASK,
29384 IX86_BUILTIN_PACKUSDW128_MASK,
29385 IX86_BUILTIN_PAVGB256_MASK,
29386 IX86_BUILTIN_PAVGW256_MASK,
29387 IX86_BUILTIN_PAVGB128_MASK,
29388 IX86_BUILTIN_PAVGW128_MASK,
29389 IX86_BUILTIN_VPERMVARSF256_MASK,
29390 IX86_BUILTIN_VPERMVARDF256_MASK,
29391 IX86_BUILTIN_VPERMDF256_MASK,
29392 IX86_BUILTIN_PABSB256_MASK,
29393 IX86_BUILTIN_PABSB128_MASK,
29394 IX86_BUILTIN_PABSW256_MASK,
29395 IX86_BUILTIN_PABSW128_MASK,
29396 IX86_BUILTIN_VPERMILVARPD_MASK,
29397 IX86_BUILTIN_VPERMILVARPS_MASK,
29398 IX86_BUILTIN_VPERMILVARPD256_MASK,
29399 IX86_BUILTIN_VPERMILVARPS256_MASK,
29400 IX86_BUILTIN_VPERMILPD_MASK,
29401 IX86_BUILTIN_VPERMILPS_MASK,
29402 IX86_BUILTIN_VPERMILPD256_MASK,
29403 IX86_BUILTIN_VPERMILPS256_MASK,
29404 IX86_BUILTIN_BLENDMQ256,
29405 IX86_BUILTIN_BLENDMD256,
29406 IX86_BUILTIN_BLENDMPD256,
29407 IX86_BUILTIN_BLENDMPS256,
29408 IX86_BUILTIN_BLENDMQ128,
29409 IX86_BUILTIN_BLENDMD128,
29410 IX86_BUILTIN_BLENDMPD128,
29411 IX86_BUILTIN_BLENDMPS128,
29412 IX86_BUILTIN_BLENDMW256,
29413 IX86_BUILTIN_BLENDMB256,
29414 IX86_BUILTIN_BLENDMW128,
29415 IX86_BUILTIN_BLENDMB128,
29416 IX86_BUILTIN_PMULLD256_MASK,
29417 IX86_BUILTIN_PMULLD128_MASK,
29418 IX86_BUILTIN_PMULUDQ256_MASK,
29419 IX86_BUILTIN_PMULDQ256_MASK,
29420 IX86_BUILTIN_PMULDQ128_MASK,
29421 IX86_BUILTIN_PMULUDQ128_MASK,
29422 IX86_BUILTIN_CVTPD2PS256_MASK,
29423 IX86_BUILTIN_CVTPD2PS_MASK,
29424 IX86_BUILTIN_VPERMVARSI256_MASK,
29425 IX86_BUILTIN_VPERMVARDI256_MASK,
29426 IX86_BUILTIN_VPERMDI256_MASK,
29427 IX86_BUILTIN_CMPQ256,
29428 IX86_BUILTIN_CMPD256,
29429 IX86_BUILTIN_UCMPQ256,
29430 IX86_BUILTIN_UCMPD256,
29431 IX86_BUILTIN_CMPB256,
29432 IX86_BUILTIN_CMPW256,
29433 IX86_BUILTIN_UCMPB256,
29434 IX86_BUILTIN_UCMPW256,
29435 IX86_BUILTIN_CMPPD256_MASK,
29436 IX86_BUILTIN_CMPPS256_MASK,
29437 IX86_BUILTIN_CMPQ128,
29438 IX86_BUILTIN_CMPD128,
29439 IX86_BUILTIN_UCMPQ128,
29440 IX86_BUILTIN_UCMPD128,
29441 IX86_BUILTIN_CMPB128,
29442 IX86_BUILTIN_CMPW128,
29443 IX86_BUILTIN_UCMPB128,
29444 IX86_BUILTIN_UCMPW128,
29445 IX86_BUILTIN_CMPPD128_MASK,
29446 IX86_BUILTIN_CMPPS128_MASK,
29448 IX86_BUILTIN_GATHER3SIV8SF,
29449 IX86_BUILTIN_GATHER3SIV4SF,
29450 IX86_BUILTIN_GATHER3SIV4DF,
29451 IX86_BUILTIN_GATHER3SIV2DF,
29452 IX86_BUILTIN_GATHER3DIV8SF,
29453 IX86_BUILTIN_GATHER3DIV4SF,
29454 IX86_BUILTIN_GATHER3DIV4DF,
29455 IX86_BUILTIN_GATHER3DIV2DF,
29456 IX86_BUILTIN_GATHER3SIV8SI,
29457 IX86_BUILTIN_GATHER3SIV4SI,
29458 IX86_BUILTIN_GATHER3SIV4DI,
29459 IX86_BUILTIN_GATHER3SIV2DI,
29460 IX86_BUILTIN_GATHER3DIV8SI,
29461 IX86_BUILTIN_GATHER3DIV4SI,
29462 IX86_BUILTIN_GATHER3DIV4DI,
29463 IX86_BUILTIN_GATHER3DIV2DI,
29464 IX86_BUILTIN_SCATTERSIV8SF,
29465 IX86_BUILTIN_SCATTERSIV4SF,
29466 IX86_BUILTIN_SCATTERSIV4DF,
29467 IX86_BUILTIN_SCATTERSIV2DF,
29468 IX86_BUILTIN_SCATTERDIV8SF,
29469 IX86_BUILTIN_SCATTERDIV4SF,
29470 IX86_BUILTIN_SCATTERDIV4DF,
29471 IX86_BUILTIN_SCATTERDIV2DF,
29472 IX86_BUILTIN_SCATTERSIV8SI,
29473 IX86_BUILTIN_SCATTERSIV4SI,
29474 IX86_BUILTIN_SCATTERSIV4DI,
29475 IX86_BUILTIN_SCATTERSIV2DI,
29476 IX86_BUILTIN_SCATTERDIV8SI,
29477 IX86_BUILTIN_SCATTERDIV4SI,
29478 IX86_BUILTIN_SCATTERDIV4DI,
29479 IX86_BUILTIN_SCATTERDIV2DI,
29481 /* AVX512DQ. */
29482 IX86_BUILTIN_RANGESD128,
29483 IX86_BUILTIN_RANGESS128,
29484 IX86_BUILTIN_KUNPCKWD,
29485 IX86_BUILTIN_KUNPCKDQ,
29486 IX86_BUILTIN_BROADCASTF32x2_512,
29487 IX86_BUILTIN_BROADCASTI32x2_512,
29488 IX86_BUILTIN_BROADCASTF64X2_512,
29489 IX86_BUILTIN_BROADCASTI64X2_512,
29490 IX86_BUILTIN_BROADCASTF32X8_512,
29491 IX86_BUILTIN_BROADCASTI32X8_512,
29492 IX86_BUILTIN_EXTRACTF64X2_512,
29493 IX86_BUILTIN_EXTRACTF32X8,
29494 IX86_BUILTIN_EXTRACTI64X2_512,
29495 IX86_BUILTIN_EXTRACTI32X8,
29496 IX86_BUILTIN_REDUCEPD512_MASK,
29497 IX86_BUILTIN_REDUCEPS512_MASK,
29498 IX86_BUILTIN_PMULLQ512,
29499 IX86_BUILTIN_XORPD512,
29500 IX86_BUILTIN_XORPS512,
29501 IX86_BUILTIN_ORPD512,
29502 IX86_BUILTIN_ORPS512,
29503 IX86_BUILTIN_ANDPD512,
29504 IX86_BUILTIN_ANDPS512,
29505 IX86_BUILTIN_ANDNPD512,
29506 IX86_BUILTIN_ANDNPS512,
29507 IX86_BUILTIN_INSERTF32X8,
29508 IX86_BUILTIN_INSERTI32X8,
29509 IX86_BUILTIN_INSERTF64X2_512,
29510 IX86_BUILTIN_INSERTI64X2_512,
29511 IX86_BUILTIN_FPCLASSPD512,
29512 IX86_BUILTIN_FPCLASSPS512,
29513 IX86_BUILTIN_CVTD2MASK512,
29514 IX86_BUILTIN_CVTQ2MASK512,
29515 IX86_BUILTIN_CVTMASK2D512,
29516 IX86_BUILTIN_CVTMASK2Q512,
29517 IX86_BUILTIN_CVTPD2QQ512,
29518 IX86_BUILTIN_CVTPS2QQ512,
29519 IX86_BUILTIN_CVTPD2UQQ512,
29520 IX86_BUILTIN_CVTPS2UQQ512,
29521 IX86_BUILTIN_CVTQQ2PS512,
29522 IX86_BUILTIN_CVTUQQ2PS512,
29523 IX86_BUILTIN_CVTQQ2PD512,
29524 IX86_BUILTIN_CVTUQQ2PD512,
29525 IX86_BUILTIN_CVTTPS2QQ512,
29526 IX86_BUILTIN_CVTTPS2UQQ512,
29527 IX86_BUILTIN_CVTTPD2QQ512,
29528 IX86_BUILTIN_CVTTPD2UQQ512,
29529 IX86_BUILTIN_RANGEPS512,
29530 IX86_BUILTIN_RANGEPD512,
29532 /* AVX512BW. */
29533 IX86_BUILTIN_PACKUSDW512,
29534 IX86_BUILTIN_PACKSSDW512,
29535 IX86_BUILTIN_LOADDQUHI512_MASK,
29536 IX86_BUILTIN_LOADDQUQI512_MASK,
29537 IX86_BUILTIN_PSLLDQ512,
29538 IX86_BUILTIN_PSRLDQ512,
29539 IX86_BUILTIN_STOREDQUHI512_MASK,
29540 IX86_BUILTIN_STOREDQUQI512_MASK,
29541 IX86_BUILTIN_PALIGNR512,
29542 IX86_BUILTIN_PALIGNR512_MASK,
29543 IX86_BUILTIN_MOVDQUHI512_MASK,
29544 IX86_BUILTIN_MOVDQUQI512_MASK,
29545 IX86_BUILTIN_PSADBW512,
29546 IX86_BUILTIN_DBPSADBW512,
29547 IX86_BUILTIN_PBROADCASTB512,
29548 IX86_BUILTIN_PBROADCASTB512_GPR,
29549 IX86_BUILTIN_PBROADCASTW512,
29550 IX86_BUILTIN_PBROADCASTW512_GPR,
29551 IX86_BUILTIN_PMOVSXBW512_MASK,
29552 IX86_BUILTIN_PMOVZXBW512_MASK,
29553 IX86_BUILTIN_VPERMVARHI512_MASK,
29554 IX86_BUILTIN_VPERMT2VARHI512,
29555 IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
29556 IX86_BUILTIN_VPERMI2VARHI512,
29557 IX86_BUILTIN_PAVGB512,
29558 IX86_BUILTIN_PAVGW512,
29559 IX86_BUILTIN_PADDB512,
29560 IX86_BUILTIN_PSUBB512,
29561 IX86_BUILTIN_PSUBSB512,
29562 IX86_BUILTIN_PADDSB512,
29563 IX86_BUILTIN_PSUBUSB512,
29564 IX86_BUILTIN_PADDUSB512,
29565 IX86_BUILTIN_PSUBW512,
29566 IX86_BUILTIN_PADDW512,
29567 IX86_BUILTIN_PSUBSW512,
29568 IX86_BUILTIN_PADDSW512,
29569 IX86_BUILTIN_PSUBUSW512,
29570 IX86_BUILTIN_PADDUSW512,
29571 IX86_BUILTIN_PMAXUW512,
29572 IX86_BUILTIN_PMAXSW512,
29573 IX86_BUILTIN_PMINUW512,
29574 IX86_BUILTIN_PMINSW512,
29575 IX86_BUILTIN_PMAXUB512,
29576 IX86_BUILTIN_PMAXSB512,
29577 IX86_BUILTIN_PMINUB512,
29578 IX86_BUILTIN_PMINSB512,
29579 IX86_BUILTIN_PMOVWB512,
29580 IX86_BUILTIN_PMOVSWB512,
29581 IX86_BUILTIN_PMOVUSWB512,
29582 IX86_BUILTIN_PMULHRSW512_MASK,
29583 IX86_BUILTIN_PMULHUW512_MASK,
29584 IX86_BUILTIN_PMULHW512_MASK,
29585 IX86_BUILTIN_PMULLW512_MASK,
29586 IX86_BUILTIN_PSLLWI512_MASK,
29587 IX86_BUILTIN_PSLLW512_MASK,
29588 IX86_BUILTIN_PACKSSWB512,
29589 IX86_BUILTIN_PACKUSWB512,
29590 IX86_BUILTIN_PSRAVV32HI,
29591 IX86_BUILTIN_PMADDUBSW512_MASK,
29592 IX86_BUILTIN_PMADDWD512_MASK,
29593 IX86_BUILTIN_PSRLVV32HI,
29594 IX86_BUILTIN_PUNPCKHBW512,
29595 IX86_BUILTIN_PUNPCKHWD512,
29596 IX86_BUILTIN_PUNPCKLBW512,
29597 IX86_BUILTIN_PUNPCKLWD512,
29598 IX86_BUILTIN_PSHUFB512,
29599 IX86_BUILTIN_PSHUFHW512,
29600 IX86_BUILTIN_PSHUFLW512,
29601 IX86_BUILTIN_PSRAWI512,
29602 IX86_BUILTIN_PSRAW512,
29603 IX86_BUILTIN_PSRLWI512,
29604 IX86_BUILTIN_PSRLW512,
29605 IX86_BUILTIN_CVTB2MASK512,
29606 IX86_BUILTIN_CVTW2MASK512,
29607 IX86_BUILTIN_CVTMASK2B512,
29608 IX86_BUILTIN_CVTMASK2W512,
29609 IX86_BUILTIN_PCMPEQB512_MASK,
29610 IX86_BUILTIN_PCMPEQW512_MASK,
29611 IX86_BUILTIN_PCMPGTB512_MASK,
29612 IX86_BUILTIN_PCMPGTW512_MASK,
29613 IX86_BUILTIN_PTESTMB512,
29614 IX86_BUILTIN_PTESTMW512,
29615 IX86_BUILTIN_PTESTNMB512,
29616 IX86_BUILTIN_PTESTNMW512,
29617 IX86_BUILTIN_PSLLVV32HI,
29618 IX86_BUILTIN_PABSB512,
29619 IX86_BUILTIN_PABSW512,
29620 IX86_BUILTIN_BLENDMW512,
29621 IX86_BUILTIN_BLENDMB512,
29622 IX86_BUILTIN_CMPB512,
29623 IX86_BUILTIN_CMPW512,
29624 IX86_BUILTIN_UCMPB512,
29625 IX86_BUILTIN_UCMPW512,
29627 /* Alternate 4 and 8 element gather/scatter for the vectorizer
29628 where all operands are 32-byte or 64-byte wide respectively. */
29629 IX86_BUILTIN_GATHERALTSIV4DF,
29630 IX86_BUILTIN_GATHERALTDIV8SF,
29631 IX86_BUILTIN_GATHERALTSIV4DI,
29632 IX86_BUILTIN_GATHERALTDIV8SI,
29633 IX86_BUILTIN_GATHER3ALTDIV16SF,
29634 IX86_BUILTIN_GATHER3ALTDIV16SI,
29635 IX86_BUILTIN_GATHER3ALTSIV4DF,
29636 IX86_BUILTIN_GATHER3ALTDIV8SF,
29637 IX86_BUILTIN_GATHER3ALTSIV4DI,
29638 IX86_BUILTIN_GATHER3ALTDIV8SI,
29639 IX86_BUILTIN_GATHER3ALTSIV8DF,
29640 IX86_BUILTIN_GATHER3ALTSIV8DI,
29641 IX86_BUILTIN_GATHER3DIV16SF,
29642 IX86_BUILTIN_GATHER3DIV16SI,
29643 IX86_BUILTIN_GATHER3DIV8DF,
29644 IX86_BUILTIN_GATHER3DIV8DI,
29645 IX86_BUILTIN_GATHER3SIV16SF,
29646 IX86_BUILTIN_GATHER3SIV16SI,
29647 IX86_BUILTIN_GATHER3SIV8DF,
29648 IX86_BUILTIN_GATHER3SIV8DI,
29649 IX86_BUILTIN_SCATTERDIV16SF,
29650 IX86_BUILTIN_SCATTERDIV16SI,
29651 IX86_BUILTIN_SCATTERDIV8DF,
29652 IX86_BUILTIN_SCATTERDIV8DI,
29653 IX86_BUILTIN_SCATTERSIV16SF,
29654 IX86_BUILTIN_SCATTERSIV16SI,
29655 IX86_BUILTIN_SCATTERSIV8DF,
29656 IX86_BUILTIN_SCATTERSIV8DI,
29658 /* AVX512PF */
29659 IX86_BUILTIN_GATHERPFQPD,
29660 IX86_BUILTIN_GATHERPFDPS,
29661 IX86_BUILTIN_GATHERPFDPD,
29662 IX86_BUILTIN_GATHERPFQPS,
29663 IX86_BUILTIN_SCATTERPFDPD,
29664 IX86_BUILTIN_SCATTERPFDPS,
29665 IX86_BUILTIN_SCATTERPFQPD,
29666 IX86_BUILTIN_SCATTERPFQPS,
29668 /* AVX-512ER */
29669 IX86_BUILTIN_EXP2PD_MASK,
29670 IX86_BUILTIN_EXP2PS_MASK,
29671 IX86_BUILTIN_EXP2PS,
29672 IX86_BUILTIN_RCP28PD,
29673 IX86_BUILTIN_RCP28PS,
29674 IX86_BUILTIN_RCP28SD,
29675 IX86_BUILTIN_RCP28SS,
29676 IX86_BUILTIN_RSQRT28PD,
29677 IX86_BUILTIN_RSQRT28PS,
29678 IX86_BUILTIN_RSQRT28SD,
29679 IX86_BUILTIN_RSQRT28SS,
29681 /* SHA builtins. */
29682 IX86_BUILTIN_SHA1MSG1,
29683 IX86_BUILTIN_SHA1MSG2,
29684 IX86_BUILTIN_SHA1NEXTE,
29685 IX86_BUILTIN_SHA1RNDS4,
29686 IX86_BUILTIN_SHA256MSG1,
29687 IX86_BUILTIN_SHA256MSG2,
29688 IX86_BUILTIN_SHA256RNDS2,
29690 /* CLFLUSHOPT instructions. */
29691 IX86_BUILTIN_CLFLUSHOPT,
29693 /* TFmode support builtins. */
29694 IX86_BUILTIN_INFQ,
29695 IX86_BUILTIN_HUGE_VALQ,
29696 IX86_BUILTIN_FABSQ,
29697 IX86_BUILTIN_COPYSIGNQ,
29699 /* Vectorizer support builtins. */
29700 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
29701 IX86_BUILTIN_CPYSGNPS,
29702 IX86_BUILTIN_CPYSGNPD,
29703 IX86_BUILTIN_CPYSGNPS256,
29704 IX86_BUILTIN_CPYSGNPS512,
29705 IX86_BUILTIN_CPYSGNPD256,
29706 IX86_BUILTIN_CPYSGNPD512,
29707 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
29708 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
29711 /* FMA4 instructions. */
29712 IX86_BUILTIN_VFMADDSS,
29713 IX86_BUILTIN_VFMADDSD,
29714 IX86_BUILTIN_VFMADDPS,
29715 IX86_BUILTIN_VFMADDPD,
29716 IX86_BUILTIN_VFMADDPS256,
29717 IX86_BUILTIN_VFMADDPD256,
29718 IX86_BUILTIN_VFMADDSUBPS,
29719 IX86_BUILTIN_VFMADDSUBPD,
29720 IX86_BUILTIN_VFMADDSUBPS256,
29721 IX86_BUILTIN_VFMADDSUBPD256,
29723 /* FMA3 instructions. */
29724 IX86_BUILTIN_VFMADDSS3,
29725 IX86_BUILTIN_VFMADDSD3,
29727 /* XOP instructions. */
29728 IX86_BUILTIN_VPCMOV,
29729 IX86_BUILTIN_VPCMOV_V2DI,
29730 IX86_BUILTIN_VPCMOV_V4SI,
29731 IX86_BUILTIN_VPCMOV_V8HI,
29732 IX86_BUILTIN_VPCMOV_V16QI,
29733 IX86_BUILTIN_VPCMOV_V4SF,
29734 IX86_BUILTIN_VPCMOV_V2DF,
29735 IX86_BUILTIN_VPCMOV256,
29736 IX86_BUILTIN_VPCMOV_V4DI256,
29737 IX86_BUILTIN_VPCMOV_V8SI256,
29738 IX86_BUILTIN_VPCMOV_V16HI256,
29739 IX86_BUILTIN_VPCMOV_V32QI256,
29740 IX86_BUILTIN_VPCMOV_V8SF256,
29741 IX86_BUILTIN_VPCMOV_V4DF256,
29743 IX86_BUILTIN_VPPERM,
29745 IX86_BUILTIN_VPMACSSWW,
29746 IX86_BUILTIN_VPMACSWW,
29747 IX86_BUILTIN_VPMACSSWD,
29748 IX86_BUILTIN_VPMACSWD,
29749 IX86_BUILTIN_VPMACSSDD,
29750 IX86_BUILTIN_VPMACSDD,
29751 IX86_BUILTIN_VPMACSSDQL,
29752 IX86_BUILTIN_VPMACSSDQH,
29753 IX86_BUILTIN_VPMACSDQL,
29754 IX86_BUILTIN_VPMACSDQH,
29755 IX86_BUILTIN_VPMADCSSWD,
29756 IX86_BUILTIN_VPMADCSWD,
29758 IX86_BUILTIN_VPHADDBW,
29759 IX86_BUILTIN_VPHADDBD,
29760 IX86_BUILTIN_VPHADDBQ,
29761 IX86_BUILTIN_VPHADDWD,
29762 IX86_BUILTIN_VPHADDWQ,
29763 IX86_BUILTIN_VPHADDDQ,
29764 IX86_BUILTIN_VPHADDUBW,
29765 IX86_BUILTIN_VPHADDUBD,
29766 IX86_BUILTIN_VPHADDUBQ,
29767 IX86_BUILTIN_VPHADDUWD,
29768 IX86_BUILTIN_VPHADDUWQ,
29769 IX86_BUILTIN_VPHADDUDQ,
29770 IX86_BUILTIN_VPHSUBBW,
29771 IX86_BUILTIN_VPHSUBWD,
29772 IX86_BUILTIN_VPHSUBDQ,
29774 IX86_BUILTIN_VPROTB,
29775 IX86_BUILTIN_VPROTW,
29776 IX86_BUILTIN_VPROTD,
29777 IX86_BUILTIN_VPROTQ,
29778 IX86_BUILTIN_VPROTB_IMM,
29779 IX86_BUILTIN_VPROTW_IMM,
29780 IX86_BUILTIN_VPROTD_IMM,
29781 IX86_BUILTIN_VPROTQ_IMM,
29783 IX86_BUILTIN_VPSHLB,
29784 IX86_BUILTIN_VPSHLW,
29785 IX86_BUILTIN_VPSHLD,
29786 IX86_BUILTIN_VPSHLQ,
29787 IX86_BUILTIN_VPSHAB,
29788 IX86_BUILTIN_VPSHAW,
29789 IX86_BUILTIN_VPSHAD,
29790 IX86_BUILTIN_VPSHAQ,
29792 IX86_BUILTIN_VFRCZSS,
29793 IX86_BUILTIN_VFRCZSD,
29794 IX86_BUILTIN_VFRCZPS,
29795 IX86_BUILTIN_VFRCZPD,
29796 IX86_BUILTIN_VFRCZPS256,
29797 IX86_BUILTIN_VFRCZPD256,
29799 IX86_BUILTIN_VPCOMEQUB,
29800 IX86_BUILTIN_VPCOMNEUB,
29801 IX86_BUILTIN_VPCOMLTUB,
29802 IX86_BUILTIN_VPCOMLEUB,
29803 IX86_BUILTIN_VPCOMGTUB,
29804 IX86_BUILTIN_VPCOMGEUB,
29805 IX86_BUILTIN_VPCOMFALSEUB,
29806 IX86_BUILTIN_VPCOMTRUEUB,
29808 IX86_BUILTIN_VPCOMEQUW,
29809 IX86_BUILTIN_VPCOMNEUW,
29810 IX86_BUILTIN_VPCOMLTUW,
29811 IX86_BUILTIN_VPCOMLEUW,
29812 IX86_BUILTIN_VPCOMGTUW,
29813 IX86_BUILTIN_VPCOMGEUW,
29814 IX86_BUILTIN_VPCOMFALSEUW,
29815 IX86_BUILTIN_VPCOMTRUEUW,
29817 IX86_BUILTIN_VPCOMEQUD,
29818 IX86_BUILTIN_VPCOMNEUD,
29819 IX86_BUILTIN_VPCOMLTUD,
29820 IX86_BUILTIN_VPCOMLEUD,
29821 IX86_BUILTIN_VPCOMGTUD,
29822 IX86_BUILTIN_VPCOMGEUD,
29823 IX86_BUILTIN_VPCOMFALSEUD,
29824 IX86_BUILTIN_VPCOMTRUEUD,
29826 IX86_BUILTIN_VPCOMEQUQ,
29827 IX86_BUILTIN_VPCOMNEUQ,
29828 IX86_BUILTIN_VPCOMLTUQ,
29829 IX86_BUILTIN_VPCOMLEUQ,
29830 IX86_BUILTIN_VPCOMGTUQ,
29831 IX86_BUILTIN_VPCOMGEUQ,
29832 IX86_BUILTIN_VPCOMFALSEUQ,
29833 IX86_BUILTIN_VPCOMTRUEUQ,
29835 IX86_BUILTIN_VPCOMEQB,
29836 IX86_BUILTIN_VPCOMNEB,
29837 IX86_BUILTIN_VPCOMLTB,
29838 IX86_BUILTIN_VPCOMLEB,
29839 IX86_BUILTIN_VPCOMGTB,
29840 IX86_BUILTIN_VPCOMGEB,
29841 IX86_BUILTIN_VPCOMFALSEB,
29842 IX86_BUILTIN_VPCOMTRUEB,
29844 IX86_BUILTIN_VPCOMEQW,
29845 IX86_BUILTIN_VPCOMNEW,
29846 IX86_BUILTIN_VPCOMLTW,
29847 IX86_BUILTIN_VPCOMLEW,
29848 IX86_BUILTIN_VPCOMGTW,
29849 IX86_BUILTIN_VPCOMGEW,
29850 IX86_BUILTIN_VPCOMFALSEW,
29851 IX86_BUILTIN_VPCOMTRUEW,
29853 IX86_BUILTIN_VPCOMEQD,
29854 IX86_BUILTIN_VPCOMNED,
29855 IX86_BUILTIN_VPCOMLTD,
29856 IX86_BUILTIN_VPCOMLED,
29857 IX86_BUILTIN_VPCOMGTD,
29858 IX86_BUILTIN_VPCOMGED,
29859 IX86_BUILTIN_VPCOMFALSED,
29860 IX86_BUILTIN_VPCOMTRUED,
29862 IX86_BUILTIN_VPCOMEQQ,
29863 IX86_BUILTIN_VPCOMNEQ,
29864 IX86_BUILTIN_VPCOMLTQ,
29865 IX86_BUILTIN_VPCOMLEQ,
29866 IX86_BUILTIN_VPCOMGTQ,
29867 IX86_BUILTIN_VPCOMGEQ,
29868 IX86_BUILTIN_VPCOMFALSEQ,
29869 IX86_BUILTIN_VPCOMTRUEQ,
29871 /* LWP instructions. */
29872 IX86_BUILTIN_LLWPCB,
29873 IX86_BUILTIN_SLWPCB,
29874 IX86_BUILTIN_LWPVAL32,
29875 IX86_BUILTIN_LWPVAL64,
29876 IX86_BUILTIN_LWPINS32,
29877 IX86_BUILTIN_LWPINS64,
29879 IX86_BUILTIN_CLZS,
29881 /* RTM */
29882 IX86_BUILTIN_XBEGIN,
29883 IX86_BUILTIN_XEND,
29884 IX86_BUILTIN_XABORT,
29885 IX86_BUILTIN_XTEST,
29887 /* BMI instructions. */
29888 IX86_BUILTIN_BEXTR32,
29889 IX86_BUILTIN_BEXTR64,
29890 IX86_BUILTIN_CTZS,
29892 /* TBM instructions. */
29893 IX86_BUILTIN_BEXTRI32,
29894 IX86_BUILTIN_BEXTRI64,
29896 /* BMI2 instructions. */
29897 IX86_BUILTIN_BZHI32,
29898 IX86_BUILTIN_BZHI64,
29899 IX86_BUILTIN_PDEP32,
29900 IX86_BUILTIN_PDEP64,
29901 IX86_BUILTIN_PEXT32,
29902 IX86_BUILTIN_PEXT64,
29904 /* ADX instructions. */
29905 IX86_BUILTIN_ADDCARRYX32,
29906 IX86_BUILTIN_ADDCARRYX64,
29908 /* FSGSBASE instructions. */
29909 IX86_BUILTIN_RDFSBASE32,
29910 IX86_BUILTIN_RDFSBASE64,
29911 IX86_BUILTIN_RDGSBASE32,
29912 IX86_BUILTIN_RDGSBASE64,
29913 IX86_BUILTIN_WRFSBASE32,
29914 IX86_BUILTIN_WRFSBASE64,
29915 IX86_BUILTIN_WRGSBASE32,
29916 IX86_BUILTIN_WRGSBASE64,
29918 /* RDRND instructions. */
29919 IX86_BUILTIN_RDRAND16_STEP,
29920 IX86_BUILTIN_RDRAND32_STEP,
29921 IX86_BUILTIN_RDRAND64_STEP,
29923 /* RDSEED instructions. */
29924 IX86_BUILTIN_RDSEED16_STEP,
29925 IX86_BUILTIN_RDSEED32_STEP,
29926 IX86_BUILTIN_RDSEED64_STEP,
29928 /* F16C instructions. */
29929 IX86_BUILTIN_CVTPH2PS,
29930 IX86_BUILTIN_CVTPH2PS256,
29931 IX86_BUILTIN_CVTPS2PH,
29932 IX86_BUILTIN_CVTPS2PH256,
29934 /* CFString built-in for darwin */
29935 IX86_BUILTIN_CFSTRING,
29937 /* Builtins to get CPU type and supported features. */
29938 IX86_BUILTIN_CPU_INIT,
29939 IX86_BUILTIN_CPU_IS,
29940 IX86_BUILTIN_CPU_SUPPORTS,
29942 /* Read/write FLAGS register built-ins. */
29943 IX86_BUILTIN_READ_FLAGS,
29944 IX86_BUILTIN_WRITE_FLAGS,
29946 IX86_BUILTIN_MAX
29949 /* Table for the ix86 builtin decls. */
29950 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
29952 /* Table of all of the builtin functions that are possible with different ISA's
29953 but are waiting to be built until a function is declared to use that
29954 ISA. */
29955 struct builtin_isa {
29956 const char *name; /* function name */
29957 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
29958 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
29959 bool const_p; /* true if the declaration is constant */
29960 bool set_and_not_built_p;
29963 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
29966 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29967 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29968 function decl in the ix86_builtins array. Returns the function decl or
29969 NULL_TREE, if the builtin was not added.
29971 If the front end has a special hook for builtin functions, delay adding
29972 builtin functions that aren't in the current ISA until the ISA is changed
29973 with function specific optimization. Doing so, can save about 300K for the
29974 default compiler. When the builtin is expanded, check at that time whether
29975 it is valid.
29977 If the front end doesn't have a special hook, record all builtins, even if
29978 it isn't an instruction set in the current ISA in case the user uses
29979 function specific options for a different ISA, so that we don't get scope
29980 errors if a builtin is added in the middle of a function scope. */
29982 static inline tree
29983 def_builtin (HOST_WIDE_INT mask, const char *name,
29984 enum ix86_builtin_func_type tcode,
29985 enum ix86_builtins code)
29987 tree decl = NULL_TREE;
29989 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29991 ix86_builtins_isa[(int) code].isa = mask;
29993 mask &= ~OPTION_MASK_ISA_64BIT;
29994 if (mask == 0
29995 || (mask & ix86_isa_flags) != 0
29996 || (lang_hooks.builtin_function
29997 == lang_hooks.builtin_function_ext_scope))
30000 tree type = ix86_get_builtin_func_type (tcode);
30001 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30002 NULL, NULL_TREE);
30003 ix86_builtins[(int) code] = decl;
30004 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30006 else
30008 ix86_builtins[(int) code] = NULL_TREE;
30009 ix86_builtins_isa[(int) code].tcode = tcode;
30010 ix86_builtins_isa[(int) code].name = name;
30011 ix86_builtins_isa[(int) code].const_p = false;
30012 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30016 return decl;
30019 /* Like def_builtin, but also marks the function decl "const". */
30021 static inline tree
30022 def_builtin_const (HOST_WIDE_INT mask, const char *name,
30023 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30025 tree decl = def_builtin (mask, name, tcode, code);
30026 if (decl)
30027 TREE_READONLY (decl) = 1;
30028 else
30029 ix86_builtins_isa[(int) code].const_p = true;
30031 return decl;
30034 /* Add any new builtin functions for a given ISA that may not have been
30035 declared. This saves a bit of space compared to adding all of the
30036 declarations to the tree, even if we didn't use them. */
30038 static void
30039 ix86_add_new_builtins (HOST_WIDE_INT isa)
30041 int i;
30043 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30045 if ((ix86_builtins_isa[i].isa & isa) != 0
30046 && ix86_builtins_isa[i].set_and_not_built_p)
30048 tree decl, type;
30050 /* Don't define the builtin again. */
30051 ix86_builtins_isa[i].set_and_not_built_p = false;
30053 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30054 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30055 type, i, BUILT_IN_MD, NULL,
30056 NULL_TREE);
30058 ix86_builtins[i] = decl;
30059 if (ix86_builtins_isa[i].const_p)
30060 TREE_READONLY (decl) = 1;
30065 /* Bits for builtin_description.flag. */
30067 /* Set when we don't support the comparison natively, and should
30068 swap_comparison in order to support it. */
30069 #define BUILTIN_DESC_SWAP_OPERANDS 1
30071 struct builtin_description
30073 const HOST_WIDE_INT mask;
30074 const enum insn_code icode;
30075 const char *const name;
30076 const enum ix86_builtins code;
30077 const enum rtx_code comparison;
30078 const int flag;
30081 static const struct builtin_description bdesc_comi[] =
30083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30095 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30106 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30109 static const struct builtin_description bdesc_pcmpestr[] =
30111 /* SSE4.2 */
30112 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30113 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30114 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30115 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30116 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30117 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30118 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30121 static const struct builtin_description bdesc_pcmpistr[] =
30123 /* SSE4.2 */
30124 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30125 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30126 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30127 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30128 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30129 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30130 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30133 /* Special builtins with variable number of arguments. */
30134 static const struct builtin_description bdesc_special_args[] =
30136 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30137 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30138 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
30140 /* 80387 (for use internally for atomic compound assignment). */
30141 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
30142 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
30143 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
30144 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
30146 /* MMX */
30147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30149 /* 3DNow! */
30150 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
30152 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
30153 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
30154 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
30155 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30156 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30157 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30158 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30159 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30160 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30162 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30163 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
30164 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30165 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30166 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30167 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30168 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30169 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
30171 /* SSE */
30172 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30173 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30174 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30176 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30177 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
30178 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30179 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
30181 /* SSE or 3DNow!A */
30182 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30183 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
30185 /* SSE2 */
30186 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30187 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
30188 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30189 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
30190 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30191 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
30192 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
30193 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
30194 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
30195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
30200 /* SSE3 */
30201 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
30203 /* SSE4.1 */
30204 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
30206 /* SSE4A */
30207 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
30208 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
30210 /* AVX */
30211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
30212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
30214 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
30215 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30216 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
30218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
30220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
30221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
30222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
30226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
30228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
30229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
30230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
30232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
30233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
30234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
30235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
30236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
30237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
30238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
30239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
30241 /* AVX2 */
30242 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
30243 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
30244 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
30245 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
30246 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
30247 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
30248 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
30249 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
30250 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
30252 /* AVX512F */
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512dq_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
30301 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
30302 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
30303 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
30304 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
30305 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
30306 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
30308 /* FSGSBASE */
30309 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30310 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30311 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30312 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
30313 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30314 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30315 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
30316 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
30318 /* RTM */
30319 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
30320 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
30321 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
30323 /* AVX512BW */
30324 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
30325 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
30326 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
30327 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
30329 /* AVX512VL */
30330 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
30331 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
30332 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
30333 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
30334 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30335 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30336 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30337 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30338 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30339 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30340 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30341 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30342 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30343 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30344 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30345 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30346 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30347 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30348 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30349 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30350 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30351 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30352 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30353 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30354 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30355 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30356 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30357 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30358 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30359 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30360 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30361 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30362 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30363 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30364 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30365 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30366 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
30367 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
30368 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
30369 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
30370 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
30371 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
30372 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
30373 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
30374 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
30375 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
30376 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
30377 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
30378 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30379 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30380 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30381 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30382 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30383 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30384 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30385 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30386 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
30387 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
30388 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
30389 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
30390 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
30391 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
30392 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
30393 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
30394 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_store_mask, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30395 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_store_mask, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30396 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_store_mask, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30397 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_store_mask, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30398 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_store_mask, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
30399 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_store_mask, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
30400 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30401 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30402 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30403 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30404 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_store_mask, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
30405 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_store_mask, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
30406 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30407 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30408 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30409 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30410 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_store_mask, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
30411 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_store_mask, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
30412 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30413 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30414 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30415 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30416 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_store_mask, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
30417 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_store_mask, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
30418 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30419 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30420 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30421 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30422 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_store_mask, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
30423 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_store_mask, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
30426 /* Builtins with variable number of arguments. */
30427 static const struct builtin_description bdesc_args[] =
30429 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
30430 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
30431 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
30432 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30433 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30434 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
30435 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
30437 /* MMX */
30438 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30440 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30441 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30442 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30445 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30446 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30449 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30450 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30451 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30454 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30455 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30457 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30458 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30459 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30460 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30462 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30464 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30466 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30469 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
30474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
30476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
30478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
30480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
30482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
30492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
30496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
30497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
30498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
30499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
30501 /* 3DNow! */
30502 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30503 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30504 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30505 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30507 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30508 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30509 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30510 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30511 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30512 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
30513 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30514 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30515 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30516 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30517 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30518 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30519 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30520 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30521 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30523 /* 3DNow!A */
30524 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
30525 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
30526 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
30527 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
30528 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30529 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
30531 /* SSE */
30532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
30533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30534 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30536 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30540 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30541 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
30542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
30543 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
30545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30547 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30548 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30549 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30557 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30559 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
30566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
30567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
30569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
30570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
30571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
30573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
30574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
30575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
30577 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30578 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30582 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30584 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30585 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30587 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30592 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30593 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
30596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
30597 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
30599 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
30601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
30605 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
30606 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
30608 /* SSE MMX or 3Dnow!A */
30609 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30610 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30611 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30613 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30614 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30615 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30616 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30618 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
30619 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
30621 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
30623 /* SSE2 */
30624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
30627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
30628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
30629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
30630 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
30632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
30633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
30634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
30635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
30636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
30638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
30640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
30641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
30642 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
30643 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
30645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
30646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
30647 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
30649 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30650 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30651 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30652 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
30659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
30660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
30661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
30662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
30663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
30665 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
30666 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
30667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
30668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
30669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
30671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
30672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
30673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
30675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
30676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
30677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
30679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30680 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30684 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30686 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30687 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30689 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30692 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30693 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30695 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
30697 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30698 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30699 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30700 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30701 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30702 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30703 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30704 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30715 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30716 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
30718 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30720 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30721 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30733 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30734 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30735 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30738 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30739 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30740 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30741 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30742 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30743 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30744 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30745 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
30748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
30749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
30751 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
30754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
30755 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
30757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
30759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
30760 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
30761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
30762 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
30764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
30765 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
30766 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
30767 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
30768 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
30769 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
30770 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
30772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
30773 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
30774 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
30775 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
30776 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
30777 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
30778 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
30780 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
30781 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
30782 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
30783 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
30785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
30786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
30787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
30789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
30791 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30793 /* SSE2 MMX */
30794 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
30795 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
30797 /* SSE3 */
30798 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
30799 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30801 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30802 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30803 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30804 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30805 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30806 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30808 /* SSSE3 */
30809 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30810 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
30811 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30812 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
30813 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30814 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
30816 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30817 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30818 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30819 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30820 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30821 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30822 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30823 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30824 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30825 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30826 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30827 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30828 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
30829 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
30830 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30831 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30832 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30833 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30834 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30835 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
30836 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30837 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
30838 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30839 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
30841 /* SSSE3. */
30842 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
30843 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
30845 /* SSE4.1 */
30846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30847 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
30849 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
30850 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30851 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30852 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30853 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
30854 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
30855 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
30857 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
30858 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
30859 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
30860 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
30861 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
30862 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
30863 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
30864 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
30865 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
30866 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
30867 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
30868 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
30869 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30871 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
30872 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30873 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30874 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30875 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30876 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30877 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
30878 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30879 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30880 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
30881 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
30882 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30884 /* SSE4.1 */
30885 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
30886 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
30887 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30888 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30890 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
30891 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
30892 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
30893 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
30895 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
30896 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
30898 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
30899 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
30901 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
30902 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
30903 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
30904 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
30906 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
30907 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
30909 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30910 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
30912 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30913 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30914 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
30916 /* SSE4.2 */
30917 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30918 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
30919 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
30920 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30921 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30923 /* SSE4A */
30924 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
30925 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
30926 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
30927 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30929 /* AES */
30930 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
30931 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30933 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30934 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30935 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30936 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30938 /* PCLMUL */
30939 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
30941 /* AVX */
30942 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30943 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30944 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30946 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30947 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30948 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30949 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30950 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30951 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30954 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30956 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30957 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30958 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30959 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30960 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30961 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30962 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30963 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30964 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30965 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30966 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30967 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
30970 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
30971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
30972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
30977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
30978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30985 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
30987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
30988 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
30989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
30990 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
30991 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
30992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
30993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
30994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
30995 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
30996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
30997 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
30998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
30999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31013 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31015 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31017 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31029 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31030 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31043 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31044 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31054 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31055 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31056 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
31075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31077 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31078 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31080 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31082 /* AVX2 */
31083 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31084 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31085 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31086 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31091 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31092 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31093 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31094 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31100 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31101 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31102 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31103 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31104 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31105 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31106 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31107 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31108 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31109 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31110 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31111 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31112 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31113 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31114 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31115 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31122 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31123 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31124 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31125 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31126 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31127 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31128 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31129 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31130 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31131 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31132 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31133 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31140 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
31142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
31143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
31144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
31145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
31146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
31147 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31149 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31150 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31151 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31152 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31153 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
31154 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31155 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31156 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
31158 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31159 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
31160 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31161 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31162 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31163 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31164 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31165 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31166 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31167 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31168 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31169 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31170 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31171 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31172 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31173 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31174 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
31175 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
31176 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
31177 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
31178 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
31179 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
31180 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
31181 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31182 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31183 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31184 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31185 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31186 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31187 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31188 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31189 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31190 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31191 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31192 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31193 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31194 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31195 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31196 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31197 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31198 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31199 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31200 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31201 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31202 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31203 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31204 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
31205 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
31206 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31207 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
31208 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31209 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31210 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31211 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31212 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31213 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31214 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31215 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
31216 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
31217 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
31218 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
31219 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31220 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31221 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31222 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31223 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31224 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31225 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31226 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31227 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31228 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31230 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31232 /* BMI */
31233 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31234 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31235 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
31237 /* TBM */
31238 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31239 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31241 /* F16C */
31242 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
31243 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
31244 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
31245 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
31247 /* BMI2 */
31248 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31249 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31250 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31251 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31252 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31253 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31255 /* AVX512F */
31256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
31264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
31266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
31267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
31268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
31274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
31275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
31276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
31281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
31282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
31283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
31284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
31285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
31286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
31287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
31288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
31304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
31305 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
31306 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
31307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
31308 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31309 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
31310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
31344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
31345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
31346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
31347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
31348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
31349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
31350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
31351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
31352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
31353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
31356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
31380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
31381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
31382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
31390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
31391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
31402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
31403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
31408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
31409 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
31410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
31411 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
31412 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
31413 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31415 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
31416 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
31417 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31418 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31419 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
31420 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
31421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
31423 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
31428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
31429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31432 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
31435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
31437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31439 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
31440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
31441 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
31442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
31443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31444 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
31445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31446 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
31448 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
31449 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
31450 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
31451 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31452 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
31453 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
31454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
31457 /* Mask arithmetic operations */
31458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
31461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
31465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
31467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
31469 /* SHA */
31470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
31474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
31478 /* AVX512VL. */
31479 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
31480 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
31481 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31482 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31483 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31484 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31485 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31486 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31487 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31488 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31489 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
31490 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31491 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
31492 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31493 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31494 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31495 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31496 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31497 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31498 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31499 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31500 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31501 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31502 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31503 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31504 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31505 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31506 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31507 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31508 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31509 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31510 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31511 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31512 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31513 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31514 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31515 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31516 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31517 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31518 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31519 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31520 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31521 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31522 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31523 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31524 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31525 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31526 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31527 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31528 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
31529 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
31530 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31531 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31532 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
31533 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
31534 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
31535 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
31536 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31537 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31538 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31539 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31540 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31541 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31542 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
31543 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
31544 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31545 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31546 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31547 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31548 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31549 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31550 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31551 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31552 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31553 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31554 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31555 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31556 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31557 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31558 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
31559 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
31560 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31561 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31562 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
31563 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
31564 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31565 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31566 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
31567 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
31568 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
31569 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
31570 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
31571 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
31572 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
31573 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
31574 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
31575 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
31576 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
31577 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
31578 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
31579 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
31580 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31581 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
31582 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
31583 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
31584 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv4di_mask, "__builtin_ia32_pbroadcastq256_mem_mask", IX86_BUILTIN_PBROADCASTQ256_MEM_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
31585 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31586 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
31587 { OPTION_MASK_ISA_AVX512VL & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512vl_vec_dup_memv2di_mask, "__builtin_ia32_pbroadcastq128_mem_mask", IX86_BUILTIN_PBROADCASTQ128_MEM_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
31588 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
31589 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31590 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
31591 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
31592 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
31593 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
31594 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
31595 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
31596 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
31597 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
31598 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
31599 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
31600 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
31601 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
31602 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
31603 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
31604 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
31605 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
31606 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
31607 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
31608 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
31609 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
31610 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
31611 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
31612 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
31613 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
31614 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
31615 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
31616 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
31617 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
31618 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
31619 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
31620 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
31621 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
31622 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
31623 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31624 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31625 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31626 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31627 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31628 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31629 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31630 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31631 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31632 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31633 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31634 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31635 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31636 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31637 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31638 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31639 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31640 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31641 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31642 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31643 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31644 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31645 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31646 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31647 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31648 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31649 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31650 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31651 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31652 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31653 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31654 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31655 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31656 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31657 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31658 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31659 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31660 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31661 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31662 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31663 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31664 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31665 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31666 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31667 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31668 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31669 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31670 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31671 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31672 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31673 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31674 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31675 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31676 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31677 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
31678 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
31679 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
31680 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
31681 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
31682 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
31683 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
31684 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
31685 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
31686 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
31687 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
31688 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
31689 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
31690 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
31691 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
31692 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
31693 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
31694 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
31695 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
31696 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
31697 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
31698 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
31699 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
31700 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
31701 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
31702 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
31703 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
31704 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
31705 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
31706 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
31707 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
31708 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
31709 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
31710 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
31711 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
31712 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
31713 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
31714 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
31715 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
31716 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
31717 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
31718 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
31719 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
31720 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
31721 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31722 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31723 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31724 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31725 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df, "__builtin_ia32_fixupimmpd256", IX86_BUILTIN_FIXUPIMMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
31726 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
31727 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
31728 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf, "__builtin_ia32_fixupimmps256", IX86_BUILTIN_FIXUPIMMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
31729 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
31730 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
31731 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df, "__builtin_ia32_fixupimmpd128", IX86_BUILTIN_FIXUPIMMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
31732 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
31733 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
31734 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf, "__builtin_ia32_fixupimmps128", IX86_BUILTIN_FIXUPIMMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
31735 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
31736 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
31737 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
31738 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
31739 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
31740 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
31741 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31742 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31743 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31744 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31745 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask" , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31746 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31747 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask" , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31748 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31749 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31750 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31751 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31752 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31753 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31754 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31755 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31756 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31757 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31758 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31759 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31760 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31761 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31762 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31763 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31764 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31765 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31766 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
31767 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31768 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
31769 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31770 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
31771 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31772 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31773 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31774 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
31775 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask_1, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31776 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask_1, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31777 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31778 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
31779 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31780 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31781 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31782 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31783 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31784 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
31785 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31786 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31787 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31788 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
31789 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31790 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31791 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31792 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31793 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31794 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31795 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31796 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31797 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31798 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31799 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31800 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31801 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31802 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31803 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask", IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
31804 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask", IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
31805 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask", IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
31806 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask", IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
31807 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
31808 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
31809 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
31810 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
31811 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
31812 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
31813 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
31814 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
31815 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
31816 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
31817 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
31818 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
31819 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31820 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31821 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31822 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31823 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31824 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31825 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31826 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31827 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31828 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31829 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31830 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31831 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31832 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31833 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31834 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31835 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31836 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31837 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31838 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31839 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31840 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31841 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31842 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31843 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31844 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31845 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31846 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31847 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31848 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31849 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31850 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31851 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31852 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31853 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31854 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31855 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31856 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31857 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31858 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31859 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31860 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31861 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31862 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31863 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
31864 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
31865 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
31866 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
31867 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
31868 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
31869 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31870 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31871 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
31872 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
31873 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
31874 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
31875 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
31876 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31877 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31878 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31879 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
31880 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
31881 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31882 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31883 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
31884 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
31885 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
31886 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
31887 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
31888 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
31889 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
31890 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
31891 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31892 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31893 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
31894 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
31895 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
31896 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
31897 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
31898 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
31899 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
31900 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
31901 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
31902 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
31903 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31904 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31905 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31906 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31907 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31908 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31909 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
31910 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
31911 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
31912 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
31913 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
31914 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
31915 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31916 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31917 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31918 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31919 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31920 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31921 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
31922 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
31923 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
31924 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
31925 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
31926 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
31927 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
31928 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
31929 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31930 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31931 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31932 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31933 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31934 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31935 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
31936 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
31937 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
31938 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
31939 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31940 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31941 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31942 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31943 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31944 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31945 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
31946 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
31947 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31948 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31949 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31950 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31951 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31952 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31953 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31954 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31955 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
31956 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
31957 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31958 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31959 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31960 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
31961 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31962 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31963 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
31964 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
31965 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
31966 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
31967 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31968 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
31969 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31970 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
31971 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31972 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
31973 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31974 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
31975 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
31976 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
31977 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
31978 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
31979 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
31980 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
31981 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
31982 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
31983 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
31984 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
31985 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
31986 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
31987 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
31988 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
31989 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
31990 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
31991 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
31992 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
31993 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
31994 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
31995 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
31996 { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
31997 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
31998 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
31999 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32000 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32001 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32002 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32003 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32004 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32005 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32006 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32007 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32008 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32009 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32010 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32011 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32012 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32013 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32014 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32015 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32016 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32017 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32018 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32019 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32020 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32021 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32022 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32023 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32024 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32025 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32026 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32027 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32028 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32029 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32030 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32031 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32032 { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32033 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32034 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32035 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32036 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32037 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32038 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32039 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32040 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32041 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32042 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32043 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32044 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32045 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32046 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32047 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32048 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32049 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32050 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32051 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32052 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32053 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32054 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32055 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32056 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32057 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32058 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32059 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32060 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32061 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32062 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32063 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32064 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32065 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32066 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32067 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32068 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32069 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32070 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32071 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32072 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32073 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32074 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32075 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32076 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32077 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32078 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32079 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32080 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32081 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32082 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32083 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32084 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32085 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32086 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32087 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32088 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32089 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32090 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32091 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32092 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32093 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32094 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32095 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32096 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32097 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32098 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32099 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask, "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32100 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32101 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32102 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32103 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32104 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask, "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32105 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32106 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32107 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32108 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32109 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask, "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32110 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask, "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32111 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32112 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32113 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32114 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32115 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32116 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32117 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32118 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32119 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32120 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32121 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32122 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32123 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32124 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32125 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32126 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32127 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32128 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32129 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32130 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32131 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask", IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32132 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask", IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32133 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask", IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32134 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask", IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32135 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32136 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32137 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32138 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32139 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32140 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32141 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32142 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32143 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32144 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32145 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32146 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32147 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32148 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32149 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32150 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32151 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32152 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32153 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32154 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32155 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32156 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32157 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32158 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32159 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32160 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32161 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32162 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32163 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32164 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32165 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32166 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32167 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32168 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32169 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
32170 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32171 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
32172 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
32173 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
32174 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32175 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32176 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32177 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32178 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32179 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
32180 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
32181 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32182 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32183 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
32184 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
32185 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
32186 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
32187 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32188 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32189 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
32190 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
32191 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32192 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32193 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
32194 { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
32195 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
32196 { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
32198 /* AVX512DQ. */
32199 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32200 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32201 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32202 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32203 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
32204 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
32205 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
32206 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
32207 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
32208 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
32209 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32210 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32211 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32212 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32213 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32214 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32215 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32216 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32217 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32218 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
32219 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32220 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
32221 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
32222 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
32223 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
32224 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
32225 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
32226 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
32227 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
32228 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
32229 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
32231 /* AVX512BW. */
32232 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
32233 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
32234 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask", IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32235 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32236 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
32237 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask", IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
32238 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
32239 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
32240 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32241 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32242 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
32243 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
32244 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
32245 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
32246 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
32247 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
32248 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32249 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
32250 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32251 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32252 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32253 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32254 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32255 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32256 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32257 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32258 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32259 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32260 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32261 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32262 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32263 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32264 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32265 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32266 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32267 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32268 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32269 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32270 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32271 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32272 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32273 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32274 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32275 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32276 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32277 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32278 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512vl_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
32279 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32280 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32281 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask" , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32282 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32283 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32284 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32285 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask", IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32286 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask", IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
32287 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32288 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
32289 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
32290 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32291 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32292 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32293 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32294 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32295 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
32296 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32297 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32298 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32299 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32300 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
32301 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
32302 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
32303 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
32304 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
32305 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
32306 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32307 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32308 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32309 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32310 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32311 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32312 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
32313 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
32314 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
32315 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32316 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32317 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
32318 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
32319 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32320 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32321 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
32322 { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
32325 /* Builtins with rounding support. */
32326 static const struct builtin_description bdesc_round_args[] =
32328 /* AVX512F */
32329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
32334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
32335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
32336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
32337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
32338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
32339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
32342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
32344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
32346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
32348 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
32349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
32350 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
32351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
32352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
32354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
32356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
32357 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
32358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
32359 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
32360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
32366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
32368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
32370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
32372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32389 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
32393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
32394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32409 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32411 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32412 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32413 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32415 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32416 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
32417 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
32418 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
32419 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
32420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
32421 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
32422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
32423 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
32424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
32431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
32432 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32439 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32441 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32444 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
32446 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32447 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
32449 /* AVX512ER */
32450 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32451 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32452 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32453 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32454 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32455 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32456 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
32457 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
32458 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32459 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32461 /* AVX512DQ. */
32462 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
32463 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
32464 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32465 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32466 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32467 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32468 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32469 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
32470 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32471 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
32472 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32473 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
32474 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32475 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
32476 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
32477 { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
32480 /* FMA4 and XOP. */
32481 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
32482 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
32483 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
32484 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
32485 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
32486 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
32487 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
32488 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
32489 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
32490 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
32491 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
32492 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
32493 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
32494 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
32495 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
32496 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
32497 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
32498 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
32499 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
32500 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
32501 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
32502 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
32503 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
32504 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
32505 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
32506 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
32507 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
32508 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
32509 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
32510 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
32511 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
32512 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
32513 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
32514 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
32515 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
32516 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
32517 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
32518 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
32519 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
32520 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
32521 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
32522 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
32523 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
32524 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
32525 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
32526 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
32527 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
32528 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
32529 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
32530 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
32531 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
32532 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
32534 static const struct builtin_description bdesc_multi_arg[] =
32536 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
32537 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
32538 UNKNOWN, (int)MULTI_ARG_3_SF },
32539 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
32540 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
32541 UNKNOWN, (int)MULTI_ARG_3_DF },
32543 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
32544 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
32545 UNKNOWN, (int)MULTI_ARG_3_SF },
32546 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
32547 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
32548 UNKNOWN, (int)MULTI_ARG_3_DF },
32550 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
32551 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
32552 UNKNOWN, (int)MULTI_ARG_3_SF },
32553 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
32554 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
32555 UNKNOWN, (int)MULTI_ARG_3_DF },
32556 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
32557 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
32558 UNKNOWN, (int)MULTI_ARG_3_SF2 },
32559 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
32560 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
32561 UNKNOWN, (int)MULTI_ARG_3_DF2 },
32563 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
32564 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
32565 UNKNOWN, (int)MULTI_ARG_3_SF },
32566 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
32567 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
32568 UNKNOWN, (int)MULTI_ARG_3_DF },
32569 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
32570 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
32571 UNKNOWN, (int)MULTI_ARG_3_SF2 },
32572 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
32573 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
32574 UNKNOWN, (int)MULTI_ARG_3_DF2 },
32576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
32577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
32578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
32579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
32580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
32581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
32582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
32584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
32585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
32586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
32587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
32588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
32589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
32590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
32592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
32594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
32595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
32596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
32599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
32600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
32604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
32607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
32609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
32610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
32611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
32612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
32613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
32614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
32615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
32617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
32618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
32619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
32620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
32621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
32622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
32624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
32625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
32626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
32627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
32628 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
32629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
32631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
32632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
32633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
32634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
32635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
32636 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
32637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
32638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
32639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
32640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
32641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
32642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
32643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
32644 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
32645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
32647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
32648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
32649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
32650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
32651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
32652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
32653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
32655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
32656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
32657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
32658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
32659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
32660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
32661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
32663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
32664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
32665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
32666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
32667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
32668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
32669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
32671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
32672 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
32673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
32674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
32675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
32676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
32677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
32679 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
32680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
32681 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
32682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
32683 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
32684 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
32685 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
32687 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
32688 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
32689 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
32690 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
32691 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
32692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
32693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
32695 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
32696 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
32697 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
32698 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
32699 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
32700 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
32701 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
32703 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
32704 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
32705 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
32706 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
32707 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
32708 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
32709 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
32711 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
32712 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
32713 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
32714 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
32715 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
32716 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
32717 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
32718 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
32720 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
32721 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
32722 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
32723 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
32724 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
32725 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
32726 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
32727 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
32729 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
32730 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
32731 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
32732 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
32736 /* TM vector builtins. */
32738 /* Reuse the existing x86-specific `struct builtin_description' cause
32739 we're lazy. Add casts to make them fit. */
32740 static const struct builtin_description bdesc_tm[] =
32742 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
32743 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
32744 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
32745 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32746 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32747 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32748 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
32750 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
32751 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
32752 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
32753 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32754 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32755 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32756 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
32758 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
32759 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
32760 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
32761 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32762 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32763 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32764 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
32766 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
32767 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
32768 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
32771 /* TM callbacks. */
32773 /* Return the builtin decl needed to load a vector of TYPE. */
32775 static tree
32776 ix86_builtin_tm_load (tree type)
32778 if (TREE_CODE (type) == VECTOR_TYPE)
32780 switch (tree_to_uhwi (TYPE_SIZE (type)))
32782 case 64:
32783 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
32784 case 128:
32785 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
32786 case 256:
32787 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
32790 return NULL_TREE;
32793 /* Return the builtin decl needed to store a vector of TYPE. */
32795 static tree
32796 ix86_builtin_tm_store (tree type)
32798 if (TREE_CODE (type) == VECTOR_TYPE)
32800 switch (tree_to_uhwi (TYPE_SIZE (type)))
32802 case 64:
32803 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
32804 case 128:
32805 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
32806 case 256:
32807 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
32810 return NULL_TREE;
32813 /* Initialize the transactional memory vector load/store builtins. */
32815 static void
32816 ix86_init_tm_builtins (void)
32818 enum ix86_builtin_func_type ftype;
32819 const struct builtin_description *d;
32820 size_t i;
32821 tree decl;
32822 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
32823 tree attrs_log, attrs_type_log;
32825 if (!flag_tm)
32826 return;
32828 /* If there are no builtins defined, we must be compiling in a
32829 language without trans-mem support. */
32830 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
32831 return;
32833 /* Use whatever attributes a normal TM load has. */
32834 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
32835 attrs_load = DECL_ATTRIBUTES (decl);
32836 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
32837 /* Use whatever attributes a normal TM store has. */
32838 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
32839 attrs_store = DECL_ATTRIBUTES (decl);
32840 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
32841 /* Use whatever attributes a normal TM log has. */
32842 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
32843 attrs_log = DECL_ATTRIBUTES (decl);
32844 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
32846 for (i = 0, d = bdesc_tm;
32847 i < ARRAY_SIZE (bdesc_tm);
32848 i++, d++)
32850 if ((d->mask & ix86_isa_flags) != 0
32851 || (lang_hooks.builtin_function
32852 == lang_hooks.builtin_function_ext_scope))
32854 tree type, attrs, attrs_type;
32855 enum built_in_function code = (enum built_in_function) d->code;
32857 ftype = (enum ix86_builtin_func_type) d->flag;
32858 type = ix86_get_builtin_func_type (ftype);
32860 if (BUILTIN_TM_LOAD_P (code))
32862 attrs = attrs_load;
32863 attrs_type = attrs_type_load;
32865 else if (BUILTIN_TM_STORE_P (code))
32867 attrs = attrs_store;
32868 attrs_type = attrs_type_store;
32870 else
32872 attrs = attrs_log;
32873 attrs_type = attrs_type_log;
32875 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
32876 /* The builtin without the prefix for
32877 calling it directly. */
32878 d->name + strlen ("__builtin_"),
32879 attrs);
32880 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
32881 set the TYPE_ATTRIBUTES. */
32882 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
32884 set_builtin_decl (code, decl, false);
32889 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
32890 in the current target ISA to allow the user to compile particular modules
32891 with different target specific options that differ from the command line
32892 options. */
32893 static void
32894 ix86_init_mmx_sse_builtins (void)
32896 const struct builtin_description * d;
32897 enum ix86_builtin_func_type ftype;
32898 size_t i;
32900 /* Add all special builtins with variable number of operands. */
32901 for (i = 0, d = bdesc_special_args;
32902 i < ARRAY_SIZE (bdesc_special_args);
32903 i++, d++)
32905 if (d->name == 0)
32906 continue;
32908 ftype = (enum ix86_builtin_func_type) d->flag;
32909 def_builtin (d->mask, d->name, ftype, d->code);
32912 /* Add all builtins with variable number of operands. */
32913 for (i = 0, d = bdesc_args;
32914 i < ARRAY_SIZE (bdesc_args);
32915 i++, d++)
32917 if (d->name == 0)
32918 continue;
32920 ftype = (enum ix86_builtin_func_type) d->flag;
32921 def_builtin_const (d->mask, d->name, ftype, d->code);
32924 /* Add all builtins with rounding. */
32925 for (i = 0, d = bdesc_round_args;
32926 i < ARRAY_SIZE (bdesc_round_args);
32927 i++, d++)
32929 if (d->name == 0)
32930 continue;
32932 ftype = (enum ix86_builtin_func_type) d->flag;
32933 def_builtin_const (d->mask, d->name, ftype, d->code);
32936 /* pcmpestr[im] insns. */
32937 for (i = 0, d = bdesc_pcmpestr;
32938 i < ARRAY_SIZE (bdesc_pcmpestr);
32939 i++, d++)
32941 if (d->code == IX86_BUILTIN_PCMPESTRM128)
32942 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
32943 else
32944 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
32945 def_builtin_const (d->mask, d->name, ftype, d->code);
32948 /* pcmpistr[im] insns. */
32949 for (i = 0, d = bdesc_pcmpistr;
32950 i < ARRAY_SIZE (bdesc_pcmpistr);
32951 i++, d++)
32953 if (d->code == IX86_BUILTIN_PCMPISTRM128)
32954 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
32955 else
32956 ftype = INT_FTYPE_V16QI_V16QI_INT;
32957 def_builtin_const (d->mask, d->name, ftype, d->code);
32960 /* comi/ucomi insns. */
32961 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
32963 if (d->mask == OPTION_MASK_ISA_SSE2)
32964 ftype = INT_FTYPE_V2DF_V2DF;
32965 else
32966 ftype = INT_FTYPE_V4SF_V4SF;
32967 def_builtin_const (d->mask, d->name, ftype, d->code);
32970 /* SSE */
32971 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
32972 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
32973 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
32974 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
32976 /* SSE or 3DNow!A */
32977 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
32978 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
32979 IX86_BUILTIN_MASKMOVQ);
32981 /* SSE2 */
32982 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
32983 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
32985 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
32986 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
32987 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
32988 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
32990 /* SSE3. */
32991 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
32992 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
32993 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
32994 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
32996 /* AES */
32997 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
32998 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
32999 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33000 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33001 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33002 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33003 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33004 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33005 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33006 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33007 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33008 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33010 /* PCLMUL */
33011 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33012 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33014 /* RDRND */
33015 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33016 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33017 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33018 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33019 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33020 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33021 IX86_BUILTIN_RDRAND64_STEP);
33023 /* AVX2 */
33024 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33025 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33026 IX86_BUILTIN_GATHERSIV2DF);
33028 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33029 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33030 IX86_BUILTIN_GATHERSIV4DF);
33032 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33033 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33034 IX86_BUILTIN_GATHERDIV2DF);
33036 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33037 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33038 IX86_BUILTIN_GATHERDIV4DF);
33040 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33041 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33042 IX86_BUILTIN_GATHERSIV4SF);
33044 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33045 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33046 IX86_BUILTIN_GATHERSIV8SF);
33048 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33049 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33050 IX86_BUILTIN_GATHERDIV4SF);
33052 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33053 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33054 IX86_BUILTIN_GATHERDIV8SF);
33056 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33057 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33058 IX86_BUILTIN_GATHERSIV2DI);
33060 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33061 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33062 IX86_BUILTIN_GATHERSIV4DI);
33064 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33065 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33066 IX86_BUILTIN_GATHERDIV2DI);
33068 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33069 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33070 IX86_BUILTIN_GATHERDIV4DI);
33072 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33073 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33074 IX86_BUILTIN_GATHERSIV4SI);
33076 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33077 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33078 IX86_BUILTIN_GATHERSIV8SI);
33080 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33081 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33082 IX86_BUILTIN_GATHERDIV4SI);
33084 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
33085 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
33086 IX86_BUILTIN_GATHERDIV8SI);
33088 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
33089 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
33090 IX86_BUILTIN_GATHERALTSIV4DF);
33092 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
33093 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
33094 IX86_BUILTIN_GATHERALTDIV8SF);
33096 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
33097 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
33098 IX86_BUILTIN_GATHERALTSIV4DI);
33100 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
33101 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
33102 IX86_BUILTIN_GATHERALTDIV8SI);
33104 /* AVX512F */
33105 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
33106 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
33107 IX86_BUILTIN_GATHER3SIV16SF);
33109 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
33110 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
33111 IX86_BUILTIN_GATHER3SIV8DF);
33113 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
33114 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
33115 IX86_BUILTIN_GATHER3DIV16SF);
33117 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
33118 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
33119 IX86_BUILTIN_GATHER3DIV8DF);
33121 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
33122 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
33123 IX86_BUILTIN_GATHER3SIV16SI);
33125 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
33126 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
33127 IX86_BUILTIN_GATHER3SIV8DI);
33129 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
33130 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
33131 IX86_BUILTIN_GATHER3DIV16SI);
33133 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
33134 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
33135 IX86_BUILTIN_GATHER3DIV8DI);
33137 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
33138 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
33139 IX86_BUILTIN_GATHER3ALTSIV8DF);
33141 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
33142 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
33143 IX86_BUILTIN_GATHER3ALTDIV16SF);
33145 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
33146 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
33147 IX86_BUILTIN_GATHER3ALTSIV8DI);
33149 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
33150 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
33151 IX86_BUILTIN_GATHER3ALTDIV16SI);
33153 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
33154 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
33155 IX86_BUILTIN_SCATTERSIV16SF);
33157 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
33158 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
33159 IX86_BUILTIN_SCATTERSIV8DF);
33161 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
33162 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
33163 IX86_BUILTIN_SCATTERDIV16SF);
33165 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
33166 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
33167 IX86_BUILTIN_SCATTERDIV8DF);
33169 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
33170 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
33171 IX86_BUILTIN_SCATTERSIV16SI);
33173 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
33174 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
33175 IX86_BUILTIN_SCATTERSIV8DI);
33177 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
33178 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
33179 IX86_BUILTIN_SCATTERDIV16SI);
33181 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
33182 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
33183 IX86_BUILTIN_SCATTERDIV8DI);
33185 /* AVX512VL */
33186 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
33187 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
33188 IX86_BUILTIN_GATHER3SIV2DF);
33190 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
33191 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
33192 IX86_BUILTIN_GATHER3SIV4DF);
33194 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
33195 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
33196 IX86_BUILTIN_GATHER3DIV2DF);
33198 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
33199 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
33200 IX86_BUILTIN_GATHER3DIV4DF);
33202 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
33203 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
33204 IX86_BUILTIN_GATHER3SIV4SF);
33206 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
33207 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
33208 IX86_BUILTIN_GATHER3SIV8SF);
33210 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
33211 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
33212 IX86_BUILTIN_GATHER3DIV4SF);
33214 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
33215 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
33216 IX86_BUILTIN_GATHER3DIV8SF);
33218 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
33219 V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
33220 IX86_BUILTIN_GATHER3SIV2DI);
33222 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
33223 V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
33224 IX86_BUILTIN_GATHER3SIV4DI);
33226 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
33227 V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
33228 IX86_BUILTIN_GATHER3DIV2DI);
33230 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
33231 V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
33232 IX86_BUILTIN_GATHER3DIV4DI);
33234 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
33235 V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
33236 IX86_BUILTIN_GATHER3SIV4SI);
33238 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
33239 V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
33240 IX86_BUILTIN_GATHER3SIV8SI);
33242 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
33243 V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
33244 IX86_BUILTIN_GATHER3DIV4SI);
33246 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
33247 V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
33248 IX86_BUILTIN_GATHER3DIV8SI);
33250 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
33251 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
33252 IX86_BUILTIN_GATHER3ALTSIV4DF);
33254 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
33255 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
33256 IX86_BUILTIN_GATHER3ALTDIV8SF);
33258 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
33259 V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
33260 IX86_BUILTIN_GATHER3ALTSIV4DI);
33262 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
33263 V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
33264 IX86_BUILTIN_GATHER3ALTDIV8SI);
33266 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
33267 VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
33268 IX86_BUILTIN_SCATTERSIV8SF);
33270 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
33271 VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
33272 IX86_BUILTIN_SCATTERSIV4SF);
33274 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
33275 VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
33276 IX86_BUILTIN_SCATTERSIV4DF);
33278 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
33279 VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
33280 IX86_BUILTIN_SCATTERSIV2DF);
33282 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
33283 VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
33284 IX86_BUILTIN_SCATTERDIV8SF);
33286 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
33287 VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
33288 IX86_BUILTIN_SCATTERDIV4SF);
33290 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
33291 VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
33292 IX86_BUILTIN_SCATTERDIV4DF);
33294 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
33295 VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
33296 IX86_BUILTIN_SCATTERDIV2DF);
33298 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
33299 VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
33300 IX86_BUILTIN_SCATTERSIV8SI);
33302 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
33303 VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
33304 IX86_BUILTIN_SCATTERSIV4SI);
33306 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
33307 VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
33308 IX86_BUILTIN_SCATTERSIV4DI);
33310 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
33311 VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
33312 IX86_BUILTIN_SCATTERSIV2DI);
33314 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
33315 VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
33316 IX86_BUILTIN_SCATTERDIV8SI);
33318 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
33319 VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
33320 IX86_BUILTIN_SCATTERDIV4SI);
33322 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
33323 VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
33324 IX86_BUILTIN_SCATTERDIV4DI);
33326 def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
33327 VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
33328 IX86_BUILTIN_SCATTERDIV2DI);
33330 /* AVX512PF */
33331 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
33332 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33333 IX86_BUILTIN_GATHERPFDPD);
33334 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
33335 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33336 IX86_BUILTIN_GATHERPFDPS);
33337 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
33338 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33339 IX86_BUILTIN_GATHERPFQPD);
33340 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
33341 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33342 IX86_BUILTIN_GATHERPFQPS);
33343 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
33344 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
33345 IX86_BUILTIN_SCATTERPFDPD);
33346 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
33347 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
33348 IX86_BUILTIN_SCATTERPFDPS);
33349 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
33350 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
33351 IX86_BUILTIN_SCATTERPFQPD);
33352 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
33353 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
33354 IX86_BUILTIN_SCATTERPFQPS);
33356 /* SHA */
33357 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
33358 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
33359 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
33360 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
33361 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
33362 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
33363 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
33364 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
33365 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
33366 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
33367 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
33368 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
33369 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
33370 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
33372 /* RTM. */
33373 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
33374 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
33376 /* MMX access to the vec_init patterns. */
33377 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
33378 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
33380 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
33381 V4HI_FTYPE_HI_HI_HI_HI,
33382 IX86_BUILTIN_VEC_INIT_V4HI);
33384 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
33385 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
33386 IX86_BUILTIN_VEC_INIT_V8QI);
33388 /* Access to the vec_extract patterns. */
33389 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
33390 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
33391 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
33392 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
33393 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
33394 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
33395 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
33396 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
33397 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
33398 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
33400 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33401 "__builtin_ia32_vec_ext_v4hi",
33402 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
33404 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
33405 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
33407 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
33408 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
33410 /* Access to the vec_set patterns. */
33411 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
33412 "__builtin_ia32_vec_set_v2di",
33413 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
33415 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
33416 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
33418 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
33419 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
33421 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
33422 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
33424 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33425 "__builtin_ia32_vec_set_v4hi",
33426 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
33428 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
33429 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
33431 /* RDSEED */
33432 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
33433 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
33434 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
33435 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
33436 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
33437 "__builtin_ia32_rdseed_di_step",
33438 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
33440 /* ADCX */
33441 def_builtin (0, "__builtin_ia32_addcarryx_u32",
33442 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
33443 def_builtin (OPTION_MASK_ISA_64BIT,
33444 "__builtin_ia32_addcarryx_u64",
33445 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
33446 IX86_BUILTIN_ADDCARRYX64);
33448 /* Read/write FLAGS. */
33449 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
33450 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33451 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
33452 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
33453 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
33454 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
33455 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
33456 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
33458 /* CLFLUSHOPT. */
33459 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
33460 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
33462 /* Add FMA4 multi-arg argument instructions */
33463 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
33465 if (d->name == 0)
33466 continue;
33468 ftype = (enum ix86_builtin_func_type) d->flag;
33469 def_builtin_const (d->mask, d->name, ftype, d->code);
33473 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
33474 to return a pointer to VERSION_DECL if the outcome of the expression
33475 formed by PREDICATE_CHAIN is true. This function will be called during
33476 version dispatch to decide which function version to execute. It returns
33477 the basic block at the end, to which more conditions can be added. */
33479 static basic_block
33480 add_condition_to_bb (tree function_decl, tree version_decl,
33481 tree predicate_chain, basic_block new_bb)
33483 gimple return_stmt;
33484 tree convert_expr, result_var;
33485 gimple convert_stmt;
33486 gimple call_cond_stmt;
33487 gimple if_else_stmt;
33489 basic_block bb1, bb2, bb3;
33490 edge e12, e23;
33492 tree cond_var, and_expr_var = NULL_TREE;
33493 gimple_seq gseq;
33495 tree predicate_decl, predicate_arg;
33497 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
33499 gcc_assert (new_bb != NULL);
33500 gseq = bb_seq (new_bb);
33503 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
33504 build_fold_addr_expr (version_decl));
33505 result_var = create_tmp_var (ptr_type_node, NULL);
33506 convert_stmt = gimple_build_assign (result_var, convert_expr);
33507 return_stmt = gimple_build_return (result_var);
33509 if (predicate_chain == NULL_TREE)
33511 gimple_seq_add_stmt (&gseq, convert_stmt);
33512 gimple_seq_add_stmt (&gseq, return_stmt);
33513 set_bb_seq (new_bb, gseq);
33514 gimple_set_bb (convert_stmt, new_bb);
33515 gimple_set_bb (return_stmt, new_bb);
33516 pop_cfun ();
33517 return new_bb;
33520 while (predicate_chain != NULL)
33522 cond_var = create_tmp_var (integer_type_node, NULL);
33523 predicate_decl = TREE_PURPOSE (predicate_chain);
33524 predicate_arg = TREE_VALUE (predicate_chain);
33525 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
33526 gimple_call_set_lhs (call_cond_stmt, cond_var);
33528 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
33529 gimple_set_bb (call_cond_stmt, new_bb);
33530 gimple_seq_add_stmt (&gseq, call_cond_stmt);
33532 predicate_chain = TREE_CHAIN (predicate_chain);
33534 if (and_expr_var == NULL)
33535 and_expr_var = cond_var;
33536 else
33538 gimple assign_stmt;
33539 /* Use MIN_EXPR to check if any integer is zero?.
33540 and_expr_var = min_expr <cond_var, and_expr_var> */
33541 assign_stmt = gimple_build_assign (and_expr_var,
33542 build2 (MIN_EXPR, integer_type_node,
33543 cond_var, and_expr_var));
33545 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
33546 gimple_set_bb (assign_stmt, new_bb);
33547 gimple_seq_add_stmt (&gseq, assign_stmt);
33551 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
33552 integer_zero_node,
33553 NULL_TREE, NULL_TREE);
33554 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
33555 gimple_set_bb (if_else_stmt, new_bb);
33556 gimple_seq_add_stmt (&gseq, if_else_stmt);
33558 gimple_seq_add_stmt (&gseq, convert_stmt);
33559 gimple_seq_add_stmt (&gseq, return_stmt);
33560 set_bb_seq (new_bb, gseq);
33562 bb1 = new_bb;
33563 e12 = split_block (bb1, if_else_stmt);
33564 bb2 = e12->dest;
33565 e12->flags &= ~EDGE_FALLTHRU;
33566 e12->flags |= EDGE_TRUE_VALUE;
33568 e23 = split_block (bb2, return_stmt);
33570 gimple_set_bb (convert_stmt, bb2);
33571 gimple_set_bb (return_stmt, bb2);
33573 bb3 = e23->dest;
33574 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
33576 remove_edge (e23);
33577 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
33579 pop_cfun ();
33581 return bb3;
33584 /* This parses the attribute arguments to target in DECL and determines
33585 the right builtin to use to match the platform specification.
33586 It returns the priority value for this version decl. If PREDICATE_LIST
33587 is not NULL, it stores the list of cpu features that need to be checked
33588 before dispatching this function. */
33590 static unsigned int
33591 get_builtin_code_for_version (tree decl, tree *predicate_list)
33593 tree attrs;
33594 struct cl_target_option cur_target;
33595 tree target_node;
33596 struct cl_target_option *new_target;
33597 const char *arg_str = NULL;
33598 const char *attrs_str = NULL;
33599 char *tok_str = NULL;
33600 char *token;
33602 /* Priority of i386 features, greater value is higher priority. This is
33603 used to decide the order in which function dispatch must happen. For
33604 instance, a version specialized for SSE4.2 should be checked for dispatch
33605 before a version for SSE3, as SSE4.2 implies SSE3. */
33606 enum feature_priority
33608 P_ZERO = 0,
33609 P_MMX,
33610 P_SSE,
33611 P_SSE2,
33612 P_SSE3,
33613 P_SSSE3,
33614 P_PROC_SSSE3,
33615 P_SSE4_A,
33616 P_PROC_SSE4_A,
33617 P_SSE4_1,
33618 P_SSE4_2,
33619 P_PROC_SSE4_2,
33620 P_POPCNT,
33621 P_AVX,
33622 P_PROC_AVX,
33623 P_FMA4,
33624 P_XOP,
33625 P_PROC_XOP,
33626 P_FMA,
33627 P_PROC_FMA,
33628 P_AVX2,
33629 P_PROC_AVX2
33632 enum feature_priority priority = P_ZERO;
33634 /* These are the target attribute strings for which a dispatcher is
33635 available, from fold_builtin_cpu. */
33637 static struct _feature_list
33639 const char *const name;
33640 const enum feature_priority priority;
33642 const feature_list[] =
33644 {"mmx", P_MMX},
33645 {"sse", P_SSE},
33646 {"sse2", P_SSE2},
33647 {"sse3", P_SSE3},
33648 {"sse4a", P_SSE4_A},
33649 {"ssse3", P_SSSE3},
33650 {"sse4.1", P_SSE4_1},
33651 {"sse4.2", P_SSE4_2},
33652 {"popcnt", P_POPCNT},
33653 {"avx", P_AVX},
33654 {"fma4", P_FMA4},
33655 {"xop", P_XOP},
33656 {"fma", P_FMA},
33657 {"avx2", P_AVX2}
33661 static unsigned int NUM_FEATURES
33662 = sizeof (feature_list) / sizeof (struct _feature_list);
33664 unsigned int i;
33666 tree predicate_chain = NULL_TREE;
33667 tree predicate_decl, predicate_arg;
33669 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
33670 gcc_assert (attrs != NULL);
33672 attrs = TREE_VALUE (TREE_VALUE (attrs));
33674 gcc_assert (TREE_CODE (attrs) == STRING_CST);
33675 attrs_str = TREE_STRING_POINTER (attrs);
33677 /* Return priority zero for default function. */
33678 if (strcmp (attrs_str, "default") == 0)
33679 return 0;
33681 /* Handle arch= if specified. For priority, set it to be 1 more than
33682 the best instruction set the processor can handle. For instance, if
33683 there is a version for atom and a version for ssse3 (the highest ISA
33684 priority for atom), the atom version must be checked for dispatch
33685 before the ssse3 version. */
33686 if (strstr (attrs_str, "arch=") != NULL)
33688 cl_target_option_save (&cur_target, &global_options);
33689 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
33690 &global_options_set);
33692 gcc_assert (target_node);
33693 new_target = TREE_TARGET_OPTION (target_node);
33694 gcc_assert (new_target);
33696 if (new_target->arch_specified && new_target->arch > 0)
33698 switch (new_target->arch)
33700 case PROCESSOR_CORE2:
33701 arg_str = "core2";
33702 priority = P_PROC_SSSE3;
33703 break;
33704 case PROCESSOR_NEHALEM:
33705 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
33706 arg_str = "westmere";
33707 else
33708 /* We translate "arch=corei7" and "arch=nehalem" to
33709 "corei7" so that it will be mapped to M_INTEL_COREI7
33710 as cpu type to cover all M_INTEL_COREI7_XXXs. */
33711 arg_str = "corei7";
33712 priority = P_PROC_SSE4_2;
33713 break;
33714 case PROCESSOR_SANDYBRIDGE:
33715 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
33716 arg_str = "ivybridge";
33717 else
33718 arg_str = "sandybridge";
33719 priority = P_PROC_AVX;
33720 break;
33721 case PROCESSOR_HASWELL:
33722 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
33723 arg_str = "broadwell";
33724 else
33725 arg_str = "haswell";
33726 priority = P_PROC_AVX2;
33727 break;
33728 case PROCESSOR_BONNELL:
33729 arg_str = "bonnell";
33730 priority = P_PROC_SSSE3;
33731 break;
33732 case PROCESSOR_SILVERMONT:
33733 arg_str = "silvermont";
33734 priority = P_PROC_SSE4_2;
33735 break;
33736 case PROCESSOR_AMDFAM10:
33737 arg_str = "amdfam10h";
33738 priority = P_PROC_SSE4_A;
33739 break;
33740 case PROCESSOR_BTVER1:
33741 arg_str = "btver1";
33742 priority = P_PROC_SSE4_A;
33743 break;
33744 case PROCESSOR_BTVER2:
33745 arg_str = "btver2";
33746 priority = P_PROC_AVX;
33747 break;
33748 case PROCESSOR_BDVER1:
33749 arg_str = "bdver1";
33750 priority = P_PROC_XOP;
33751 break;
33752 case PROCESSOR_BDVER2:
33753 arg_str = "bdver2";
33754 priority = P_PROC_FMA;
33755 break;
33756 case PROCESSOR_BDVER3:
33757 arg_str = "bdver3";
33758 priority = P_PROC_FMA;
33759 break;
33760 case PROCESSOR_BDVER4:
33761 arg_str = "bdver4";
33762 priority = P_PROC_AVX2;
33763 break;
33767 cl_target_option_restore (&global_options, &cur_target);
33769 if (predicate_list && arg_str == NULL)
33771 error_at (DECL_SOURCE_LOCATION (decl),
33772 "No dispatcher found for the versioning attributes");
33773 return 0;
33776 if (predicate_list)
33778 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
33779 /* For a C string literal the length includes the trailing NULL. */
33780 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
33781 predicate_chain = tree_cons (predicate_decl, predicate_arg,
33782 predicate_chain);
33786 /* Process feature name. */
33787 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
33788 strcpy (tok_str, attrs_str);
33789 token = strtok (tok_str, ",");
33790 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
33792 while (token != NULL)
33794 /* Do not process "arch=" */
33795 if (strncmp (token, "arch=", 5) == 0)
33797 token = strtok (NULL, ",");
33798 continue;
33800 for (i = 0; i < NUM_FEATURES; ++i)
33802 if (strcmp (token, feature_list[i].name) == 0)
33804 if (predicate_list)
33806 predicate_arg = build_string_literal (
33807 strlen (feature_list[i].name) + 1,
33808 feature_list[i].name);
33809 predicate_chain = tree_cons (predicate_decl, predicate_arg,
33810 predicate_chain);
33812 /* Find the maximum priority feature. */
33813 if (feature_list[i].priority > priority)
33814 priority = feature_list[i].priority;
33816 break;
33819 if (predicate_list && i == NUM_FEATURES)
33821 error_at (DECL_SOURCE_LOCATION (decl),
33822 "No dispatcher found for %s", token);
33823 return 0;
33825 token = strtok (NULL, ",");
33827 free (tok_str);
33829 if (predicate_list && predicate_chain == NULL_TREE)
33831 error_at (DECL_SOURCE_LOCATION (decl),
33832 "No dispatcher found for the versioning attributes : %s",
33833 attrs_str);
33834 return 0;
33836 else if (predicate_list)
33838 predicate_chain = nreverse (predicate_chain);
33839 *predicate_list = predicate_chain;
33842 return priority;
33845 /* This compares the priority of target features in function DECL1
33846 and DECL2. It returns positive value if DECL1 is higher priority,
33847 negative value if DECL2 is higher priority and 0 if they are the
33848 same. */
33850 static int
33851 ix86_compare_version_priority (tree decl1, tree decl2)
33853 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
33854 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
33856 return (int)priority1 - (int)priority2;
33859 /* V1 and V2 point to function versions with different priorities
33860 based on the target ISA. This function compares their priorities. */
33862 static int
33863 feature_compare (const void *v1, const void *v2)
33865 typedef struct _function_version_info
33867 tree version_decl;
33868 tree predicate_chain;
33869 unsigned int dispatch_priority;
33870 } function_version_info;
33872 const function_version_info c1 = *(const function_version_info *)v1;
33873 const function_version_info c2 = *(const function_version_info *)v2;
33874 return (c2.dispatch_priority - c1.dispatch_priority);
33877 /* This function generates the dispatch function for
33878 multi-versioned functions. DISPATCH_DECL is the function which will
33879 contain the dispatch logic. FNDECLS are the function choices for
33880 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
33881 in DISPATCH_DECL in which the dispatch code is generated. */
33883 static int
33884 dispatch_function_versions (tree dispatch_decl,
33885 void *fndecls_p,
33886 basic_block *empty_bb)
33888 tree default_decl;
33889 gimple ifunc_cpu_init_stmt;
33890 gimple_seq gseq;
33891 int ix;
33892 tree ele;
33893 vec<tree> *fndecls;
33894 unsigned int num_versions = 0;
33895 unsigned int actual_versions = 0;
33896 unsigned int i;
33898 struct _function_version_info
33900 tree version_decl;
33901 tree predicate_chain;
33902 unsigned int dispatch_priority;
33903 }*function_version_info;
33905 gcc_assert (dispatch_decl != NULL
33906 && fndecls_p != NULL
33907 && empty_bb != NULL);
33909 /*fndecls_p is actually a vector. */
33910 fndecls = static_cast<vec<tree> *> (fndecls_p);
33912 /* At least one more version other than the default. */
33913 num_versions = fndecls->length ();
33914 gcc_assert (num_versions >= 2);
33916 function_version_info = (struct _function_version_info *)
33917 XNEWVEC (struct _function_version_info, (num_versions - 1));
33919 /* The first version in the vector is the default decl. */
33920 default_decl = (*fndecls)[0];
33922 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
33924 gseq = bb_seq (*empty_bb);
33925 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
33926 constructors, so explicity call __builtin_cpu_init here. */
33927 ifunc_cpu_init_stmt = gimple_build_call_vec (
33928 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
33929 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
33930 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
33931 set_bb_seq (*empty_bb, gseq);
33933 pop_cfun ();
33936 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
33938 tree version_decl = ele;
33939 tree predicate_chain = NULL_TREE;
33940 unsigned int priority;
33941 /* Get attribute string, parse it and find the right predicate decl.
33942 The predicate function could be a lengthy combination of many
33943 features, like arch-type and various isa-variants. */
33944 priority = get_builtin_code_for_version (version_decl,
33945 &predicate_chain);
33947 if (predicate_chain == NULL_TREE)
33948 continue;
33950 function_version_info [actual_versions].version_decl = version_decl;
33951 function_version_info [actual_versions].predicate_chain
33952 = predicate_chain;
33953 function_version_info [actual_versions].dispatch_priority = priority;
33954 actual_versions++;
33957 /* Sort the versions according to descending order of dispatch priority. The
33958 priority is based on the ISA. This is not a perfect solution. There
33959 could still be ambiguity. If more than one function version is suitable
33960 to execute, which one should be dispatched? In future, allow the user
33961 to specify a dispatch priority next to the version. */
33962 qsort (function_version_info, actual_versions,
33963 sizeof (struct _function_version_info), feature_compare);
33965 for (i = 0; i < actual_versions; ++i)
33966 *empty_bb = add_condition_to_bb (dispatch_decl,
33967 function_version_info[i].version_decl,
33968 function_version_info[i].predicate_chain,
33969 *empty_bb);
33971 /* dispatch default version at the end. */
33972 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
33973 NULL, *empty_bb);
33975 free (function_version_info);
33976 return 0;
33979 /* Comparator function to be used in qsort routine to sort attribute
33980 specification strings to "target". */
33982 static int
33983 attr_strcmp (const void *v1, const void *v2)
33985 const char *c1 = *(char *const*)v1;
33986 const char *c2 = *(char *const*)v2;
33987 return strcmp (c1, c2);
33990 /* ARGLIST is the argument to target attribute. This function tokenizes
33991 the comma separated arguments, sorts them and returns a string which
33992 is a unique identifier for the comma separated arguments. It also
33993 replaces non-identifier characters "=,-" with "_". */
33995 static char *
33996 sorted_attr_string (tree arglist)
33998 tree arg;
33999 size_t str_len_sum = 0;
34000 char **args = NULL;
34001 char *attr_str, *ret_str;
34002 char *attr = NULL;
34003 unsigned int argnum = 1;
34004 unsigned int i;
34006 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34008 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34009 size_t len = strlen (str);
34010 str_len_sum += len + 1;
34011 if (arg != arglist)
34012 argnum++;
34013 for (i = 0; i < strlen (str); i++)
34014 if (str[i] == ',')
34015 argnum++;
34018 attr_str = XNEWVEC (char, str_len_sum);
34019 str_len_sum = 0;
34020 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
34022 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
34023 size_t len = strlen (str);
34024 memcpy (attr_str + str_len_sum, str, len);
34025 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
34026 str_len_sum += len + 1;
34029 /* Replace "=,-" with "_". */
34030 for (i = 0; i < strlen (attr_str); i++)
34031 if (attr_str[i] == '=' || attr_str[i]== '-')
34032 attr_str[i] = '_';
34034 if (argnum == 1)
34035 return attr_str;
34037 args = XNEWVEC (char *, argnum);
34039 i = 0;
34040 attr = strtok (attr_str, ",");
34041 while (attr != NULL)
34043 args[i] = attr;
34044 i++;
34045 attr = strtok (NULL, ",");
34048 qsort (args, argnum, sizeof (char *), attr_strcmp);
34050 ret_str = XNEWVEC (char, str_len_sum);
34051 str_len_sum = 0;
34052 for (i = 0; i < argnum; i++)
34054 size_t len = strlen (args[i]);
34055 memcpy (ret_str + str_len_sum, args[i], len);
34056 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
34057 str_len_sum += len + 1;
34060 XDELETEVEC (args);
34061 XDELETEVEC (attr_str);
34062 return ret_str;
34065 /* This function changes the assembler name for functions that are
34066 versions. If DECL is a function version and has a "target"
34067 attribute, it appends the attribute string to its assembler name. */
34069 static tree
34070 ix86_mangle_function_version_assembler_name (tree decl, tree id)
34072 tree version_attr;
34073 const char *orig_name, *version_string;
34074 char *attr_str, *assembler_name;
34076 if (DECL_DECLARED_INLINE_P (decl)
34077 && lookup_attribute ("gnu_inline",
34078 DECL_ATTRIBUTES (decl)))
34079 error_at (DECL_SOURCE_LOCATION (decl),
34080 "Function versions cannot be marked as gnu_inline,"
34081 " bodies have to be generated");
34083 if (DECL_VIRTUAL_P (decl)
34084 || DECL_VINDEX (decl))
34085 sorry ("Virtual function multiversioning not supported");
34087 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34089 /* target attribute string cannot be NULL. */
34090 gcc_assert (version_attr != NULL_TREE);
34092 orig_name = IDENTIFIER_POINTER (id);
34093 version_string
34094 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
34096 if (strcmp (version_string, "default") == 0)
34097 return id;
34099 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
34100 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
34102 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
34104 /* Allow assembler name to be modified if already set. */
34105 if (DECL_ASSEMBLER_NAME_SET_P (decl))
34106 SET_DECL_RTL (decl, NULL);
34108 tree ret = get_identifier (assembler_name);
34109 XDELETEVEC (attr_str);
34110 XDELETEVEC (assembler_name);
34111 return ret;
34114 /* This function returns true if FN1 and FN2 are versions of the same function,
34115 that is, the target strings of the function decls are different. This assumes
34116 that FN1 and FN2 have the same signature. */
34118 static bool
34119 ix86_function_versions (tree fn1, tree fn2)
34121 tree attr1, attr2;
34122 char *target1, *target2;
34123 bool result;
34125 if (TREE_CODE (fn1) != FUNCTION_DECL
34126 || TREE_CODE (fn2) != FUNCTION_DECL)
34127 return false;
34129 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
34130 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
34132 /* At least one function decl should have the target attribute specified. */
34133 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
34134 return false;
34136 /* Diagnose missing target attribute if one of the decls is already
34137 multi-versioned. */
34138 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
34140 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
34142 if (attr2 != NULL_TREE)
34144 tree tem = fn1;
34145 fn1 = fn2;
34146 fn2 = tem;
34147 attr1 = attr2;
34149 error_at (DECL_SOURCE_LOCATION (fn2),
34150 "missing %<target%> attribute for multi-versioned %D",
34151 fn2);
34152 inform (DECL_SOURCE_LOCATION (fn1),
34153 "previous declaration of %D", fn1);
34154 /* Prevent diagnosing of the same error multiple times. */
34155 DECL_ATTRIBUTES (fn2)
34156 = tree_cons (get_identifier ("target"),
34157 copy_node (TREE_VALUE (attr1)),
34158 DECL_ATTRIBUTES (fn2));
34160 return false;
34163 target1 = sorted_attr_string (TREE_VALUE (attr1));
34164 target2 = sorted_attr_string (TREE_VALUE (attr2));
34166 /* The sorted target strings must be different for fn1 and fn2
34167 to be versions. */
34168 if (strcmp (target1, target2) == 0)
34169 result = false;
34170 else
34171 result = true;
34173 XDELETEVEC (target1);
34174 XDELETEVEC (target2);
34176 return result;
34179 static tree
34180 ix86_mangle_decl_assembler_name (tree decl, tree id)
34182 /* For function version, add the target suffix to the assembler name. */
34183 if (TREE_CODE (decl) == FUNCTION_DECL
34184 && DECL_FUNCTION_VERSIONED (decl))
34185 id = ix86_mangle_function_version_assembler_name (decl, id);
34186 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
34187 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
34188 #endif
34190 return id;
34193 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
34194 is true, append the full path name of the source file. */
34196 static char *
34197 make_name (tree decl, const char *suffix, bool make_unique)
34199 char *global_var_name;
34200 int name_len;
34201 const char *name;
34202 const char *unique_name = NULL;
34204 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34206 /* Get a unique name that can be used globally without any chances
34207 of collision at link time. */
34208 if (make_unique)
34209 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
34211 name_len = strlen (name) + strlen (suffix) + 2;
34213 if (make_unique)
34214 name_len += strlen (unique_name) + 1;
34215 global_var_name = XNEWVEC (char, name_len);
34217 /* Use '.' to concatenate names as it is demangler friendly. */
34218 if (make_unique)
34219 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
34220 suffix);
34221 else
34222 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
34224 return global_var_name;
34227 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34229 /* Make a dispatcher declaration for the multi-versioned function DECL.
34230 Calls to DECL function will be replaced with calls to the dispatcher
34231 by the front-end. Return the decl created. */
34233 static tree
34234 make_dispatcher_decl (const tree decl)
34236 tree func_decl;
34237 char *func_name;
34238 tree fn_type, func_type;
34239 bool is_uniq = false;
34241 if (TREE_PUBLIC (decl) == 0)
34242 is_uniq = true;
34244 func_name = make_name (decl, "ifunc", is_uniq);
34246 fn_type = TREE_TYPE (decl);
34247 func_type = build_function_type (TREE_TYPE (fn_type),
34248 TYPE_ARG_TYPES (fn_type));
34250 func_decl = build_fn_decl (func_name, func_type);
34251 XDELETEVEC (func_name);
34252 TREE_USED (func_decl) = 1;
34253 DECL_CONTEXT (func_decl) = NULL_TREE;
34254 DECL_INITIAL (func_decl) = error_mark_node;
34255 DECL_ARTIFICIAL (func_decl) = 1;
34256 /* Mark this func as external, the resolver will flip it again if
34257 it gets generated. */
34258 DECL_EXTERNAL (func_decl) = 1;
34259 /* This will be of type IFUNCs have to be externally visible. */
34260 TREE_PUBLIC (func_decl) = 1;
34262 return func_decl;
34265 #endif
34267 /* Returns true if decl is multi-versioned and DECL is the default function,
34268 that is it is not tagged with target specific optimization. */
34270 static bool
34271 is_function_default_version (const tree decl)
34273 if (TREE_CODE (decl) != FUNCTION_DECL
34274 || !DECL_FUNCTION_VERSIONED (decl))
34275 return false;
34276 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34277 gcc_assert (attr);
34278 attr = TREE_VALUE (TREE_VALUE (attr));
34279 return (TREE_CODE (attr) == STRING_CST
34280 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
34283 /* Make a dispatcher declaration for the multi-versioned function DECL.
34284 Calls to DECL function will be replaced with calls to the dispatcher
34285 by the front-end. Returns the decl of the dispatcher function. */
34287 static tree
34288 ix86_get_function_versions_dispatcher (void *decl)
34290 tree fn = (tree) decl;
34291 struct cgraph_node *node = NULL;
34292 struct cgraph_node *default_node = NULL;
34293 struct cgraph_function_version_info *node_v = NULL;
34294 struct cgraph_function_version_info *first_v = NULL;
34296 tree dispatch_decl = NULL;
34298 struct cgraph_function_version_info *default_version_info = NULL;
34300 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
34302 node = cgraph_get_node (fn);
34303 gcc_assert (node != NULL);
34305 node_v = get_cgraph_node_version (node);
34306 gcc_assert (node_v != NULL);
34308 if (node_v->dispatcher_resolver != NULL)
34309 return node_v->dispatcher_resolver;
34311 /* Find the default version and make it the first node. */
34312 first_v = node_v;
34313 /* Go to the beginning of the chain. */
34314 while (first_v->prev != NULL)
34315 first_v = first_v->prev;
34316 default_version_info = first_v;
34317 while (default_version_info != NULL)
34319 if (is_function_default_version
34320 (default_version_info->this_node->decl))
34321 break;
34322 default_version_info = default_version_info->next;
34325 /* If there is no default node, just return NULL. */
34326 if (default_version_info == NULL)
34327 return NULL;
34329 /* Make default info the first node. */
34330 if (first_v != default_version_info)
34332 default_version_info->prev->next = default_version_info->next;
34333 if (default_version_info->next)
34334 default_version_info->next->prev = default_version_info->prev;
34335 first_v->prev = default_version_info;
34336 default_version_info->next = first_v;
34337 default_version_info->prev = NULL;
34340 default_node = default_version_info->this_node;
34342 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
34343 if (targetm.has_ifunc_p ())
34345 struct cgraph_function_version_info *it_v = NULL;
34346 struct cgraph_node *dispatcher_node = NULL;
34347 struct cgraph_function_version_info *dispatcher_version_info = NULL;
34349 /* Right now, the dispatching is done via ifunc. */
34350 dispatch_decl = make_dispatcher_decl (default_node->decl);
34352 dispatcher_node = cgraph_get_create_node (dispatch_decl);
34353 gcc_assert (dispatcher_node != NULL);
34354 dispatcher_node->dispatcher_function = 1;
34355 dispatcher_version_info
34356 = insert_new_cgraph_node_version (dispatcher_node);
34357 dispatcher_version_info->next = default_version_info;
34358 dispatcher_node->definition = 1;
34360 /* Set the dispatcher for all the versions. */
34361 it_v = default_version_info;
34362 while (it_v != NULL)
34364 it_v->dispatcher_resolver = dispatch_decl;
34365 it_v = it_v->next;
34368 else
34369 #endif
34371 error_at (DECL_SOURCE_LOCATION (default_node->decl),
34372 "multiversioning needs ifunc which is not supported "
34373 "on this target");
34376 return dispatch_decl;
34379 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
34380 it to CHAIN. */
34382 static tree
34383 make_attribute (const char *name, const char *arg_name, tree chain)
34385 tree attr_name;
34386 tree attr_arg_name;
34387 tree attr_args;
34388 tree attr;
34390 attr_name = get_identifier (name);
34391 attr_arg_name = build_string (strlen (arg_name), arg_name);
34392 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
34393 attr = tree_cons (attr_name, attr_args, chain);
34394 return attr;
34397 /* Make the resolver function decl to dispatch the versions of
34398 a multi-versioned function, DEFAULT_DECL. Create an
34399 empty basic block in the resolver and store the pointer in
34400 EMPTY_BB. Return the decl of the resolver function. */
34402 static tree
34403 make_resolver_func (const tree default_decl,
34404 const tree dispatch_decl,
34405 basic_block *empty_bb)
34407 char *resolver_name;
34408 tree decl, type, decl_name, t;
34409 bool is_uniq = false;
34411 /* IFUNC's have to be globally visible. So, if the default_decl is
34412 not, then the name of the IFUNC should be made unique. */
34413 if (TREE_PUBLIC (default_decl) == 0)
34414 is_uniq = true;
34416 /* Append the filename to the resolver function if the versions are
34417 not externally visible. This is because the resolver function has
34418 to be externally visible for the loader to find it. So, appending
34419 the filename will prevent conflicts with a resolver function from
34420 another module which is based on the same version name. */
34421 resolver_name = make_name (default_decl, "resolver", is_uniq);
34423 /* The resolver function should return a (void *). */
34424 type = build_function_type_list (ptr_type_node, NULL_TREE);
34426 decl = build_fn_decl (resolver_name, type);
34427 decl_name = get_identifier (resolver_name);
34428 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
34430 DECL_NAME (decl) = decl_name;
34431 TREE_USED (decl) = 1;
34432 DECL_ARTIFICIAL (decl) = 1;
34433 DECL_IGNORED_P (decl) = 0;
34434 /* IFUNC resolvers have to be externally visible. */
34435 TREE_PUBLIC (decl) = 1;
34436 DECL_UNINLINABLE (decl) = 1;
34438 /* Resolver is not external, body is generated. */
34439 DECL_EXTERNAL (decl) = 0;
34440 DECL_EXTERNAL (dispatch_decl) = 0;
34442 DECL_CONTEXT (decl) = NULL_TREE;
34443 DECL_INITIAL (decl) = make_node (BLOCK);
34444 DECL_STATIC_CONSTRUCTOR (decl) = 0;
34446 if (DECL_COMDAT_GROUP (default_decl)
34447 || TREE_PUBLIC (default_decl))
34449 /* In this case, each translation unit with a call to this
34450 versioned function will put out a resolver. Ensure it
34451 is comdat to keep just one copy. */
34452 DECL_COMDAT (decl) = 1;
34453 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
34455 /* Build result decl and add to function_decl. */
34456 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
34457 DECL_ARTIFICIAL (t) = 1;
34458 DECL_IGNORED_P (t) = 1;
34459 DECL_RESULT (decl) = t;
34461 gimplify_function_tree (decl);
34462 push_cfun (DECL_STRUCT_FUNCTION (decl));
34463 *empty_bb = init_lowered_empty_function (decl, false);
34465 cgraph_add_new_function (decl, true);
34466 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
34468 pop_cfun ();
34470 gcc_assert (dispatch_decl != NULL);
34471 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
34472 DECL_ATTRIBUTES (dispatch_decl)
34473 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
34475 /* Create the alias for dispatch to resolver here. */
34476 /*cgraph_create_function_alias (dispatch_decl, decl);*/
34477 cgraph_same_body_alias (NULL, dispatch_decl, decl);
34478 XDELETEVEC (resolver_name);
34479 return decl;
34482 /* Generate the dispatching code body to dispatch multi-versioned function
34483 DECL. The target hook is called to process the "target" attributes and
34484 provide the code to dispatch the right function at run-time. NODE points
34485 to the dispatcher decl whose body will be created. */
34487 static tree
34488 ix86_generate_version_dispatcher_body (void *node_p)
34490 tree resolver_decl;
34491 basic_block empty_bb;
34492 tree default_ver_decl;
34493 struct cgraph_node *versn;
34494 struct cgraph_node *node;
34496 struct cgraph_function_version_info *node_version_info = NULL;
34497 struct cgraph_function_version_info *versn_info = NULL;
34499 node = (cgraph_node *)node_p;
34501 node_version_info = get_cgraph_node_version (node);
34502 gcc_assert (node->dispatcher_function
34503 && node_version_info != NULL);
34505 if (node_version_info->dispatcher_resolver)
34506 return node_version_info->dispatcher_resolver;
34508 /* The first version in the chain corresponds to the default version. */
34509 default_ver_decl = node_version_info->next->this_node->decl;
34511 /* node is going to be an alias, so remove the finalized bit. */
34512 node->definition = false;
34514 resolver_decl = make_resolver_func (default_ver_decl,
34515 node->decl, &empty_bb);
34517 node_version_info->dispatcher_resolver = resolver_decl;
34519 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
34521 auto_vec<tree, 2> fn_ver_vec;
34523 for (versn_info = node_version_info->next; versn_info;
34524 versn_info = versn_info->next)
34526 versn = versn_info->this_node;
34527 /* Check for virtual functions here again, as by this time it should
34528 have been determined if this function needs a vtable index or
34529 not. This happens for methods in derived classes that override
34530 virtual methods in base classes but are not explicitly marked as
34531 virtual. */
34532 if (DECL_VINDEX (versn->decl))
34533 sorry ("Virtual function multiversioning not supported");
34535 fn_ver_vec.safe_push (versn->decl);
34538 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
34539 rebuild_cgraph_edges ();
34540 pop_cfun ();
34541 return resolver_decl;
34543 /* This builds the processor_model struct type defined in
34544 libgcc/config/i386/cpuinfo.c */
34546 static tree
34547 build_processor_model_struct (void)
34549 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
34550 "__cpu_features"};
34551 tree field = NULL_TREE, field_chain = NULL_TREE;
34552 int i;
34553 tree type = make_node (RECORD_TYPE);
34555 /* The first 3 fields are unsigned int. */
34556 for (i = 0; i < 3; ++i)
34558 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
34559 get_identifier (field_name[i]), unsigned_type_node);
34560 if (field_chain != NULL_TREE)
34561 DECL_CHAIN (field) = field_chain;
34562 field_chain = field;
34565 /* The last field is an array of unsigned integers of size one. */
34566 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
34567 get_identifier (field_name[3]),
34568 build_array_type (unsigned_type_node,
34569 build_index_type (size_one_node)));
34570 if (field_chain != NULL_TREE)
34571 DECL_CHAIN (field) = field_chain;
34572 field_chain = field;
34574 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
34575 return type;
34578 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
34580 static tree
34581 make_var_decl (tree type, const char *name)
34583 tree new_decl;
34585 new_decl = build_decl (UNKNOWN_LOCATION,
34586 VAR_DECL,
34587 get_identifier(name),
34588 type);
34590 DECL_EXTERNAL (new_decl) = 1;
34591 TREE_STATIC (new_decl) = 1;
34592 TREE_PUBLIC (new_decl) = 1;
34593 DECL_INITIAL (new_decl) = 0;
34594 DECL_ARTIFICIAL (new_decl) = 0;
34595 DECL_PRESERVE_P (new_decl) = 1;
34597 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
34598 assemble_variable (new_decl, 0, 0, 0);
34600 return new_decl;
34603 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
34604 into an integer defined in libgcc/config/i386/cpuinfo.c */
34606 static tree
34607 fold_builtin_cpu (tree fndecl, tree *args)
34609 unsigned int i;
34610 enum ix86_builtins fn_code = (enum ix86_builtins)
34611 DECL_FUNCTION_CODE (fndecl);
34612 tree param_string_cst = NULL;
34614 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
34615 enum processor_features
34617 F_CMOV = 0,
34618 F_MMX,
34619 F_POPCNT,
34620 F_SSE,
34621 F_SSE2,
34622 F_SSE3,
34623 F_SSSE3,
34624 F_SSE4_1,
34625 F_SSE4_2,
34626 F_AVX,
34627 F_AVX2,
34628 F_SSE4_A,
34629 F_FMA4,
34630 F_XOP,
34631 F_FMA,
34632 F_MAX
34635 /* These are the values for vendor types and cpu types and subtypes
34636 in cpuinfo.c. Cpu types and subtypes should be subtracted by
34637 the corresponding start value. */
34638 enum processor_model
34640 M_INTEL = 1,
34641 M_AMD,
34642 M_CPU_TYPE_START,
34643 M_INTEL_BONNELL,
34644 M_INTEL_CORE2,
34645 M_INTEL_COREI7,
34646 M_AMDFAM10H,
34647 M_AMDFAM15H,
34648 M_INTEL_SILVERMONT,
34649 M_AMD_BTVER1,
34650 M_AMD_BTVER2,
34651 M_CPU_SUBTYPE_START,
34652 M_INTEL_COREI7_NEHALEM,
34653 M_INTEL_COREI7_WESTMERE,
34654 M_INTEL_COREI7_SANDYBRIDGE,
34655 M_AMDFAM10H_BARCELONA,
34656 M_AMDFAM10H_SHANGHAI,
34657 M_AMDFAM10H_ISTANBUL,
34658 M_AMDFAM15H_BDVER1,
34659 M_AMDFAM15H_BDVER2,
34660 M_AMDFAM15H_BDVER3,
34661 M_AMDFAM15H_BDVER4,
34662 M_INTEL_COREI7_IVYBRIDGE,
34663 M_INTEL_COREI7_HASWELL
34666 static struct _arch_names_table
34668 const char *const name;
34669 const enum processor_model model;
34671 const arch_names_table[] =
34673 {"amd", M_AMD},
34674 {"intel", M_INTEL},
34675 {"atom", M_INTEL_BONNELL},
34676 {"slm", M_INTEL_SILVERMONT},
34677 {"core2", M_INTEL_CORE2},
34678 {"corei7", M_INTEL_COREI7},
34679 {"nehalem", M_INTEL_COREI7_NEHALEM},
34680 {"westmere", M_INTEL_COREI7_WESTMERE},
34681 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
34682 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
34683 {"haswell", M_INTEL_COREI7_HASWELL},
34684 {"bonnell", M_INTEL_BONNELL},
34685 {"silvermont", M_INTEL_SILVERMONT},
34686 {"amdfam10h", M_AMDFAM10H},
34687 {"barcelona", M_AMDFAM10H_BARCELONA},
34688 {"shanghai", M_AMDFAM10H_SHANGHAI},
34689 {"istanbul", M_AMDFAM10H_ISTANBUL},
34690 {"btver1", M_AMD_BTVER1},
34691 {"amdfam15h", M_AMDFAM15H},
34692 {"bdver1", M_AMDFAM15H_BDVER1},
34693 {"bdver2", M_AMDFAM15H_BDVER2},
34694 {"bdver3", M_AMDFAM15H_BDVER3},
34695 {"bdver4", M_AMDFAM15H_BDVER4},
34696 {"btver2", M_AMD_BTVER2},
34699 static struct _isa_names_table
34701 const char *const name;
34702 const enum processor_features feature;
34704 const isa_names_table[] =
34706 {"cmov", F_CMOV},
34707 {"mmx", F_MMX},
34708 {"popcnt", F_POPCNT},
34709 {"sse", F_SSE},
34710 {"sse2", F_SSE2},
34711 {"sse3", F_SSE3},
34712 {"ssse3", F_SSSE3},
34713 {"sse4a", F_SSE4_A},
34714 {"sse4.1", F_SSE4_1},
34715 {"sse4.2", F_SSE4_2},
34716 {"avx", F_AVX},
34717 {"fma4", F_FMA4},
34718 {"xop", F_XOP},
34719 {"fma", F_FMA},
34720 {"avx2", F_AVX2}
34723 tree __processor_model_type = build_processor_model_struct ();
34724 tree __cpu_model_var = make_var_decl (__processor_model_type,
34725 "__cpu_model");
34728 varpool_add_new_variable (__cpu_model_var);
34730 gcc_assert ((args != NULL) && (*args != NULL));
34732 param_string_cst = *args;
34733 while (param_string_cst
34734 && TREE_CODE (param_string_cst) != STRING_CST)
34736 /* *args must be a expr that can contain other EXPRS leading to a
34737 STRING_CST. */
34738 if (!EXPR_P (param_string_cst))
34740 error ("Parameter to builtin must be a string constant or literal");
34741 return integer_zero_node;
34743 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
34746 gcc_assert (param_string_cst);
34748 if (fn_code == IX86_BUILTIN_CPU_IS)
34750 tree ref;
34751 tree field;
34752 tree final;
34754 unsigned int field_val = 0;
34755 unsigned int NUM_ARCH_NAMES
34756 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
34758 for (i = 0; i < NUM_ARCH_NAMES; i++)
34759 if (strcmp (arch_names_table[i].name,
34760 TREE_STRING_POINTER (param_string_cst)) == 0)
34761 break;
34763 if (i == NUM_ARCH_NAMES)
34765 error ("Parameter to builtin not valid: %s",
34766 TREE_STRING_POINTER (param_string_cst));
34767 return integer_zero_node;
34770 field = TYPE_FIELDS (__processor_model_type);
34771 field_val = arch_names_table[i].model;
34773 /* CPU types are stored in the next field. */
34774 if (field_val > M_CPU_TYPE_START
34775 && field_val < M_CPU_SUBTYPE_START)
34777 field = DECL_CHAIN (field);
34778 field_val -= M_CPU_TYPE_START;
34781 /* CPU subtypes are stored in the next field. */
34782 if (field_val > M_CPU_SUBTYPE_START)
34784 field = DECL_CHAIN ( DECL_CHAIN (field));
34785 field_val -= M_CPU_SUBTYPE_START;
34788 /* Get the appropriate field in __cpu_model. */
34789 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
34790 field, NULL_TREE);
34792 /* Check the value. */
34793 final = build2 (EQ_EXPR, unsigned_type_node, ref,
34794 build_int_cstu (unsigned_type_node, field_val));
34795 return build1 (CONVERT_EXPR, integer_type_node, final);
34797 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
34799 tree ref;
34800 tree array_elt;
34801 tree field;
34802 tree final;
34804 unsigned int field_val = 0;
34805 unsigned int NUM_ISA_NAMES
34806 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
34808 for (i = 0; i < NUM_ISA_NAMES; i++)
34809 if (strcmp (isa_names_table[i].name,
34810 TREE_STRING_POINTER (param_string_cst)) == 0)
34811 break;
34813 if (i == NUM_ISA_NAMES)
34815 error ("Parameter to builtin not valid: %s",
34816 TREE_STRING_POINTER (param_string_cst));
34817 return integer_zero_node;
34820 field = TYPE_FIELDS (__processor_model_type);
34821 /* Get the last field, which is __cpu_features. */
34822 while (DECL_CHAIN (field))
34823 field = DECL_CHAIN (field);
34825 /* Get the appropriate field: __cpu_model.__cpu_features */
34826 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
34827 field, NULL_TREE);
34829 /* Access the 0th element of __cpu_features array. */
34830 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
34831 integer_zero_node, NULL_TREE, NULL_TREE);
34833 field_val = (1 << isa_names_table[i].feature);
34834 /* Return __cpu_model.__cpu_features[0] & field_val */
34835 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
34836 build_int_cstu (unsigned_type_node, field_val));
34837 return build1 (CONVERT_EXPR, integer_type_node, final);
34839 gcc_unreachable ();
34842 static tree
34843 ix86_fold_builtin (tree fndecl, int n_args,
34844 tree *args, bool ignore ATTRIBUTE_UNUSED)
34846 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
34848 enum ix86_builtins fn_code = (enum ix86_builtins)
34849 DECL_FUNCTION_CODE (fndecl);
34850 if (fn_code == IX86_BUILTIN_CPU_IS
34851 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
34853 gcc_assert (n_args == 1);
34854 return fold_builtin_cpu (fndecl, args);
34858 #ifdef SUBTARGET_FOLD_BUILTIN
34859 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
34860 #endif
34862 return NULL_TREE;
34865 /* Make builtins to detect cpu type and features supported. NAME is
34866 the builtin name, CODE is the builtin code, and FTYPE is the function
34867 type of the builtin. */
34869 static void
34870 make_cpu_type_builtin (const char* name, int code,
34871 enum ix86_builtin_func_type ftype, bool is_const)
34873 tree decl;
34874 tree type;
34876 type = ix86_get_builtin_func_type (ftype);
34877 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
34878 NULL, NULL_TREE);
34879 gcc_assert (decl != NULL_TREE);
34880 ix86_builtins[(int) code] = decl;
34881 TREE_READONLY (decl) = is_const;
34884 /* Make builtins to get CPU type and features supported. The created
34885 builtins are :
34887 __builtin_cpu_init (), to detect cpu type and features,
34888 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
34889 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
34892 static void
34893 ix86_init_platform_type_builtins (void)
34895 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
34896 INT_FTYPE_VOID, false);
34897 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
34898 INT_FTYPE_PCCHAR, true);
34899 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
34900 INT_FTYPE_PCCHAR, true);
34903 /* Internal method for ix86_init_builtins. */
34905 static void
34906 ix86_init_builtins_va_builtins_abi (void)
34908 tree ms_va_ref, sysv_va_ref;
34909 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
34910 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
34911 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
34912 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
34914 if (!TARGET_64BIT)
34915 return;
34916 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
34917 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
34918 ms_va_ref = build_reference_type (ms_va_list_type_node);
34919 sysv_va_ref =
34920 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
34922 fnvoid_va_end_ms =
34923 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
34924 fnvoid_va_start_ms =
34925 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
34926 fnvoid_va_end_sysv =
34927 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
34928 fnvoid_va_start_sysv =
34929 build_varargs_function_type_list (void_type_node, sysv_va_ref,
34930 NULL_TREE);
34931 fnvoid_va_copy_ms =
34932 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
34933 NULL_TREE);
34934 fnvoid_va_copy_sysv =
34935 build_function_type_list (void_type_node, sysv_va_ref,
34936 sysv_va_ref, NULL_TREE);
34938 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
34939 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
34940 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
34941 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
34942 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
34943 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
34944 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
34945 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
34946 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
34947 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
34948 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
34949 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
34952 static void
34953 ix86_init_builtin_types (void)
34955 tree float128_type_node, float80_type_node;
34957 /* The __float80 type. */
34958 float80_type_node = long_double_type_node;
34959 if (TYPE_MODE (float80_type_node) != XFmode)
34961 /* The __float80 type. */
34962 float80_type_node = make_node (REAL_TYPE);
34964 TYPE_PRECISION (float80_type_node) = 80;
34965 layout_type (float80_type_node);
34967 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
34969 /* The __float128 type. */
34970 float128_type_node = make_node (REAL_TYPE);
34971 TYPE_PRECISION (float128_type_node) = 128;
34972 layout_type (float128_type_node);
34973 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
34975 /* This macro is built by i386-builtin-types.awk. */
34976 DEFINE_BUILTIN_PRIMITIVE_TYPES;
34979 static void
34980 ix86_init_builtins (void)
34982 tree t;
34984 ix86_init_builtin_types ();
34986 /* Builtins to get CPU type and features. */
34987 ix86_init_platform_type_builtins ();
34989 /* TFmode support builtins. */
34990 def_builtin_const (0, "__builtin_infq",
34991 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
34992 def_builtin_const (0, "__builtin_huge_valq",
34993 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
34995 /* We will expand them to normal call if SSE isn't available since
34996 they are used by libgcc. */
34997 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
34998 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
34999 BUILT_IN_MD, "__fabstf2", NULL_TREE);
35000 TREE_READONLY (t) = 1;
35001 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
35003 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
35004 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
35005 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
35006 TREE_READONLY (t) = 1;
35007 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
35009 ix86_init_tm_builtins ();
35010 ix86_init_mmx_sse_builtins ();
35012 if (TARGET_LP64)
35013 ix86_init_builtins_va_builtins_abi ();
35015 #ifdef SUBTARGET_INIT_BUILTINS
35016 SUBTARGET_INIT_BUILTINS;
35017 #endif
35020 /* Return the ix86 builtin for CODE. */
35022 static tree
35023 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
35025 if (code >= IX86_BUILTIN_MAX)
35026 return error_mark_node;
35028 return ix86_builtins[code];
35031 /* Errors in the source file can cause expand_expr to return const0_rtx
35032 where we expect a vector. To avoid crashing, use one of the vector
35033 clear instructions. */
35034 static rtx
35035 safe_vector_operand (rtx x, enum machine_mode mode)
35037 if (x == const0_rtx)
35038 x = CONST0_RTX (mode);
35039 return x;
35042 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
35044 static rtx
35045 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
35047 rtx pat;
35048 tree arg0 = CALL_EXPR_ARG (exp, 0);
35049 tree arg1 = CALL_EXPR_ARG (exp, 1);
35050 rtx op0 = expand_normal (arg0);
35051 rtx op1 = expand_normal (arg1);
35052 enum machine_mode tmode = insn_data[icode].operand[0].mode;
35053 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
35054 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
35056 if (VECTOR_MODE_P (mode0))
35057 op0 = safe_vector_operand (op0, mode0);
35058 if (VECTOR_MODE_P (mode1))
35059 op1 = safe_vector_operand (op1, mode1);
35061 if (optimize || !target
35062 || GET_MODE (target) != tmode
35063 || !insn_data[icode].operand[0].predicate (target, tmode))
35064 target = gen_reg_rtx (tmode);
35066 if (GET_MODE (op1) == SImode && mode1 == TImode)
35068 rtx x = gen_reg_rtx (V4SImode);
35069 emit_insn (gen_sse2_loadd (x, op1));
35070 op1 = gen_lowpart (TImode, x);
35073 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35074 op0 = copy_to_mode_reg (mode0, op0);
35075 if (!insn_data[icode].operand[2].predicate (op1, mode1))
35076 op1 = copy_to_mode_reg (mode1, op1);
35078 pat = GEN_FCN (icode) (target, op0, op1);
35079 if (! pat)
35080 return 0;
35082 emit_insn (pat);
35084 return target;
35087 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
35089 static rtx
35090 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
35091 enum ix86_builtin_func_type m_type,
35092 enum rtx_code sub_code)
35094 rtx pat;
35095 int i;
35096 int nargs;
35097 bool comparison_p = false;
35098 bool tf_p = false;
35099 bool last_arg_constant = false;
35100 int num_memory = 0;
35101 struct {
35102 rtx op;
35103 enum machine_mode mode;
35104 } args[4];
35106 enum machine_mode tmode = insn_data[icode].operand[0].mode;
35108 switch (m_type)
35110 case MULTI_ARG_4_DF2_DI_I:
35111 case MULTI_ARG_4_DF2_DI_I1:
35112 case MULTI_ARG_4_SF2_SI_I:
35113 case MULTI_ARG_4_SF2_SI_I1:
35114 nargs = 4;
35115 last_arg_constant = true;
35116 break;
35118 case MULTI_ARG_3_SF:
35119 case MULTI_ARG_3_DF:
35120 case MULTI_ARG_3_SF2:
35121 case MULTI_ARG_3_DF2:
35122 case MULTI_ARG_3_DI:
35123 case MULTI_ARG_3_SI:
35124 case MULTI_ARG_3_SI_DI:
35125 case MULTI_ARG_3_HI:
35126 case MULTI_ARG_3_HI_SI:
35127 case MULTI_ARG_3_QI:
35128 case MULTI_ARG_3_DI2:
35129 case MULTI_ARG_3_SI2:
35130 case MULTI_ARG_3_HI2:
35131 case MULTI_ARG_3_QI2:
35132 nargs = 3;
35133 break;
35135 case MULTI_ARG_2_SF:
35136 case MULTI_ARG_2_DF:
35137 case MULTI_ARG_2_DI:
35138 case MULTI_ARG_2_SI:
35139 case MULTI_ARG_2_HI:
35140 case MULTI_ARG_2_QI:
35141 nargs = 2;
35142 break;
35144 case MULTI_ARG_2_DI_IMM:
35145 case MULTI_ARG_2_SI_IMM:
35146 case MULTI_ARG_2_HI_IMM:
35147 case MULTI_ARG_2_QI_IMM:
35148 nargs = 2;
35149 last_arg_constant = true;
35150 break;
35152 case MULTI_ARG_1_SF:
35153 case MULTI_ARG_1_DF:
35154 case MULTI_ARG_1_SF2:
35155 case MULTI_ARG_1_DF2:
35156 case MULTI_ARG_1_DI:
35157 case MULTI_ARG_1_SI:
35158 case MULTI_ARG_1_HI:
35159 case MULTI_ARG_1_QI:
35160 case MULTI_ARG_1_SI_DI:
35161 case MULTI_ARG_1_HI_DI:
35162 case MULTI_ARG_1_HI_SI:
35163 case MULTI_ARG_1_QI_DI:
35164 case MULTI_ARG_1_QI_SI:
35165 case MULTI_ARG_1_QI_HI:
35166 nargs = 1;
35167 break;
35169 case MULTI_ARG_2_DI_CMP:
35170 case MULTI_ARG_2_SI_CMP:
35171 case MULTI_ARG_2_HI_CMP:
35172 case MULTI_ARG_2_QI_CMP:
35173 nargs = 2;
35174 comparison_p = true;
35175 break;
35177 case MULTI_ARG_2_SF_TF:
35178 case MULTI_ARG_2_DF_TF:
35179 case MULTI_ARG_2_DI_TF:
35180 case MULTI_ARG_2_SI_TF:
35181 case MULTI_ARG_2_HI_TF:
35182 case MULTI_ARG_2_QI_TF:
35183 nargs = 2;
35184 tf_p = true;
35185 break;
35187 default:
35188 gcc_unreachable ();
35191 if (optimize || !target
35192 || GET_MODE (target) != tmode
35193 || !insn_data[icode].operand[0].predicate (target, tmode))
35194 target = gen_reg_rtx (tmode);
35196 gcc_assert (nargs <= 4);
35198 for (i = 0; i < nargs; i++)
35200 tree arg = CALL_EXPR_ARG (exp, i);
35201 rtx op = expand_normal (arg);
35202 int adjust = (comparison_p) ? 1 : 0;
35203 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
35205 if (last_arg_constant && i == nargs - 1)
35207 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
35209 enum insn_code new_icode = icode;
35210 switch (icode)
35212 case CODE_FOR_xop_vpermil2v2df3:
35213 case CODE_FOR_xop_vpermil2v4sf3:
35214 case CODE_FOR_xop_vpermil2v4df3:
35215 case CODE_FOR_xop_vpermil2v8sf3:
35216 error ("the last argument must be a 2-bit immediate");
35217 return gen_reg_rtx (tmode);
35218 case CODE_FOR_xop_rotlv2di3:
35219 new_icode = CODE_FOR_rotlv2di3;
35220 goto xop_rotl;
35221 case CODE_FOR_xop_rotlv4si3:
35222 new_icode = CODE_FOR_rotlv4si3;
35223 goto xop_rotl;
35224 case CODE_FOR_xop_rotlv8hi3:
35225 new_icode = CODE_FOR_rotlv8hi3;
35226 goto xop_rotl;
35227 case CODE_FOR_xop_rotlv16qi3:
35228 new_icode = CODE_FOR_rotlv16qi3;
35229 xop_rotl:
35230 if (CONST_INT_P (op))
35232 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
35233 op = GEN_INT (INTVAL (op) & mask);
35234 gcc_checking_assert
35235 (insn_data[icode].operand[i + 1].predicate (op, mode));
35237 else
35239 gcc_checking_assert
35240 (nargs == 2
35241 && insn_data[new_icode].operand[0].mode == tmode
35242 && insn_data[new_icode].operand[1].mode == tmode
35243 && insn_data[new_icode].operand[2].mode == mode
35244 && insn_data[new_icode].operand[0].predicate
35245 == insn_data[icode].operand[0].predicate
35246 && insn_data[new_icode].operand[1].predicate
35247 == insn_data[icode].operand[1].predicate);
35248 icode = new_icode;
35249 goto non_constant;
35251 break;
35252 default:
35253 gcc_unreachable ();
35257 else
35259 non_constant:
35260 if (VECTOR_MODE_P (mode))
35261 op = safe_vector_operand (op, mode);
35263 /* If we aren't optimizing, only allow one memory operand to be
35264 generated. */
35265 if (memory_operand (op, mode))
35266 num_memory++;
35268 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
35270 if (optimize
35271 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
35272 || num_memory > 1)
35273 op = force_reg (mode, op);
35276 args[i].op = op;
35277 args[i].mode = mode;
35280 switch (nargs)
35282 case 1:
35283 pat = GEN_FCN (icode) (target, args[0].op);
35284 break;
35286 case 2:
35287 if (tf_p)
35288 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
35289 GEN_INT ((int)sub_code));
35290 else if (! comparison_p)
35291 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35292 else
35294 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
35295 args[0].op,
35296 args[1].op);
35298 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
35300 break;
35302 case 3:
35303 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35304 break;
35306 case 4:
35307 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
35308 break;
35310 default:
35311 gcc_unreachable ();
35314 if (! pat)
35315 return 0;
35317 emit_insn (pat);
35318 return target;
35321 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
35322 insns with vec_merge. */
35324 static rtx
35325 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
35326 rtx target)
35328 rtx pat;
35329 tree arg0 = CALL_EXPR_ARG (exp, 0);
35330 rtx op1, op0 = expand_normal (arg0);
35331 enum machine_mode tmode = insn_data[icode].operand[0].mode;
35332 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
35334 if (optimize || !target
35335 || GET_MODE (target) != tmode
35336 || !insn_data[icode].operand[0].predicate (target, tmode))
35337 target = gen_reg_rtx (tmode);
35339 if (VECTOR_MODE_P (mode0))
35340 op0 = safe_vector_operand (op0, mode0);
35342 if ((optimize && !register_operand (op0, mode0))
35343 || !insn_data[icode].operand[1].predicate (op0, mode0))
35344 op0 = copy_to_mode_reg (mode0, op0);
35346 op1 = op0;
35347 if (!insn_data[icode].operand[2].predicate (op1, mode0))
35348 op1 = copy_to_mode_reg (mode0, op1);
35350 pat = GEN_FCN (icode) (target, op0, op1);
35351 if (! pat)
35352 return 0;
35353 emit_insn (pat);
35354 return target;
35357 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
35359 static rtx
35360 ix86_expand_sse_compare (const struct builtin_description *d,
35361 tree exp, rtx target, bool swap)
35363 rtx pat;
35364 tree arg0 = CALL_EXPR_ARG (exp, 0);
35365 tree arg1 = CALL_EXPR_ARG (exp, 1);
35366 rtx op0 = expand_normal (arg0);
35367 rtx op1 = expand_normal (arg1);
35368 rtx op2;
35369 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
35370 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35371 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35372 enum rtx_code comparison = d->comparison;
35374 if (VECTOR_MODE_P (mode0))
35375 op0 = safe_vector_operand (op0, mode0);
35376 if (VECTOR_MODE_P (mode1))
35377 op1 = safe_vector_operand (op1, mode1);
35379 /* Swap operands if we have a comparison that isn't available in
35380 hardware. */
35381 if (swap)
35383 rtx tmp = gen_reg_rtx (mode1);
35384 emit_move_insn (tmp, op1);
35385 op1 = op0;
35386 op0 = tmp;
35389 if (optimize || !target
35390 || GET_MODE (target) != tmode
35391 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35392 target = gen_reg_rtx (tmode);
35394 if ((optimize && !register_operand (op0, mode0))
35395 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
35396 op0 = copy_to_mode_reg (mode0, op0);
35397 if ((optimize && !register_operand (op1, mode1))
35398 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
35399 op1 = copy_to_mode_reg (mode1, op1);
35401 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
35402 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
35403 if (! pat)
35404 return 0;
35405 emit_insn (pat);
35406 return target;
35409 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
35411 static rtx
35412 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
35413 rtx target)
35415 rtx pat;
35416 tree arg0 = CALL_EXPR_ARG (exp, 0);
35417 tree arg1 = CALL_EXPR_ARG (exp, 1);
35418 rtx op0 = expand_normal (arg0);
35419 rtx op1 = expand_normal (arg1);
35420 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
35421 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
35422 enum rtx_code comparison = d->comparison;
35424 if (VECTOR_MODE_P (mode0))
35425 op0 = safe_vector_operand (op0, mode0);
35426 if (VECTOR_MODE_P (mode1))
35427 op1 = safe_vector_operand (op1, mode1);
35429 /* Swap operands if we have a comparison that isn't available in
35430 hardware. */
35431 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
35433 rtx tmp = op1;
35434 op1 = op0;
35435 op0 = tmp;
35438 target = gen_reg_rtx (SImode);
35439 emit_move_insn (target, const0_rtx);
35440 target = gen_rtx_SUBREG (QImode, target, 0);
35442 if ((optimize && !register_operand (op0, mode0))
35443 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35444 op0 = copy_to_mode_reg (mode0, op0);
35445 if ((optimize && !register_operand (op1, mode1))
35446 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35447 op1 = copy_to_mode_reg (mode1, op1);
35449 pat = GEN_FCN (d->icode) (op0, op1);
35450 if (! pat)
35451 return 0;
35452 emit_insn (pat);
35453 emit_insn (gen_rtx_SET (VOIDmode,
35454 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35455 gen_rtx_fmt_ee (comparison, QImode,
35456 SET_DEST (pat),
35457 const0_rtx)));
35459 return SUBREG_REG (target);
35462 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
35464 static rtx
35465 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
35466 rtx target)
35468 rtx pat;
35469 tree arg0 = CALL_EXPR_ARG (exp, 0);
35470 rtx op1, op0 = expand_normal (arg0);
35471 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
35472 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35474 if (optimize || target == 0
35475 || GET_MODE (target) != tmode
35476 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35477 target = gen_reg_rtx (tmode);
35479 if (VECTOR_MODE_P (mode0))
35480 op0 = safe_vector_operand (op0, mode0);
35482 if ((optimize && !register_operand (op0, mode0))
35483 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35484 op0 = copy_to_mode_reg (mode0, op0);
35486 op1 = GEN_INT (d->comparison);
35488 pat = GEN_FCN (d->icode) (target, op0, op1);
35489 if (! pat)
35490 return 0;
35491 emit_insn (pat);
35492 return target;
35495 static rtx
35496 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
35497 tree exp, rtx target)
35499 rtx pat;
35500 tree arg0 = CALL_EXPR_ARG (exp, 0);
35501 tree arg1 = CALL_EXPR_ARG (exp, 1);
35502 rtx op0 = expand_normal (arg0);
35503 rtx op1 = expand_normal (arg1);
35504 rtx op2;
35505 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
35506 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
35507 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
35509 if (optimize || target == 0
35510 || GET_MODE (target) != tmode
35511 || !insn_data[d->icode].operand[0].predicate (target, tmode))
35512 target = gen_reg_rtx (tmode);
35514 op0 = safe_vector_operand (op0, mode0);
35515 op1 = safe_vector_operand (op1, mode1);
35517 if ((optimize && !register_operand (op0, mode0))
35518 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35519 op0 = copy_to_mode_reg (mode0, op0);
35520 if ((optimize && !register_operand (op1, mode1))
35521 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35522 op1 = copy_to_mode_reg (mode1, op1);
35524 op2 = GEN_INT (d->comparison);
35526 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
35527 if (! pat)
35528 return 0;
35529 emit_insn (pat);
35530 return target;
35533 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
35535 static rtx
35536 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
35537 rtx target)
35539 rtx pat;
35540 tree arg0 = CALL_EXPR_ARG (exp, 0);
35541 tree arg1 = CALL_EXPR_ARG (exp, 1);
35542 rtx op0 = expand_normal (arg0);
35543 rtx op1 = expand_normal (arg1);
35544 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
35545 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
35546 enum rtx_code comparison = d->comparison;
35548 if (VECTOR_MODE_P (mode0))
35549 op0 = safe_vector_operand (op0, mode0);
35550 if (VECTOR_MODE_P (mode1))
35551 op1 = safe_vector_operand (op1, mode1);
35553 target = gen_reg_rtx (SImode);
35554 emit_move_insn (target, const0_rtx);
35555 target = gen_rtx_SUBREG (QImode, target, 0);
35557 if ((optimize && !register_operand (op0, mode0))
35558 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
35559 op0 = copy_to_mode_reg (mode0, op0);
35560 if ((optimize && !register_operand (op1, mode1))
35561 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
35562 op1 = copy_to_mode_reg (mode1, op1);
35564 pat = GEN_FCN (d->icode) (op0, op1);
35565 if (! pat)
35566 return 0;
35567 emit_insn (pat);
35568 emit_insn (gen_rtx_SET (VOIDmode,
35569 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35570 gen_rtx_fmt_ee (comparison, QImode,
35571 SET_DEST (pat),
35572 const0_rtx)));
35574 return SUBREG_REG (target);
35577 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
35579 static rtx
35580 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
35581 tree exp, rtx target)
35583 rtx pat;
35584 tree arg0 = CALL_EXPR_ARG (exp, 0);
35585 tree arg1 = CALL_EXPR_ARG (exp, 1);
35586 tree arg2 = CALL_EXPR_ARG (exp, 2);
35587 tree arg3 = CALL_EXPR_ARG (exp, 3);
35588 tree arg4 = CALL_EXPR_ARG (exp, 4);
35589 rtx scratch0, scratch1;
35590 rtx op0 = expand_normal (arg0);
35591 rtx op1 = expand_normal (arg1);
35592 rtx op2 = expand_normal (arg2);
35593 rtx op3 = expand_normal (arg3);
35594 rtx op4 = expand_normal (arg4);
35595 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
35597 tmode0 = insn_data[d->icode].operand[0].mode;
35598 tmode1 = insn_data[d->icode].operand[1].mode;
35599 modev2 = insn_data[d->icode].operand[2].mode;
35600 modei3 = insn_data[d->icode].operand[3].mode;
35601 modev4 = insn_data[d->icode].operand[4].mode;
35602 modei5 = insn_data[d->icode].operand[5].mode;
35603 modeimm = insn_data[d->icode].operand[6].mode;
35605 if (VECTOR_MODE_P (modev2))
35606 op0 = safe_vector_operand (op0, modev2);
35607 if (VECTOR_MODE_P (modev4))
35608 op2 = safe_vector_operand (op2, modev4);
35610 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
35611 op0 = copy_to_mode_reg (modev2, op0);
35612 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
35613 op1 = copy_to_mode_reg (modei3, op1);
35614 if ((optimize && !register_operand (op2, modev4))
35615 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
35616 op2 = copy_to_mode_reg (modev4, op2);
35617 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
35618 op3 = copy_to_mode_reg (modei5, op3);
35620 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
35622 error ("the fifth argument must be an 8-bit immediate");
35623 return const0_rtx;
35626 if (d->code == IX86_BUILTIN_PCMPESTRI128)
35628 if (optimize || !target
35629 || GET_MODE (target) != tmode0
35630 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
35631 target = gen_reg_rtx (tmode0);
35633 scratch1 = gen_reg_rtx (tmode1);
35635 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
35637 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
35639 if (optimize || !target
35640 || GET_MODE (target) != tmode1
35641 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
35642 target = gen_reg_rtx (tmode1);
35644 scratch0 = gen_reg_rtx (tmode0);
35646 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
35648 else
35650 gcc_assert (d->flag);
35652 scratch0 = gen_reg_rtx (tmode0);
35653 scratch1 = gen_reg_rtx (tmode1);
35655 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
35658 if (! pat)
35659 return 0;
35661 emit_insn (pat);
35663 if (d->flag)
35665 target = gen_reg_rtx (SImode);
35666 emit_move_insn (target, const0_rtx);
35667 target = gen_rtx_SUBREG (QImode, target, 0);
35669 emit_insn
35670 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35671 gen_rtx_fmt_ee (EQ, QImode,
35672 gen_rtx_REG ((enum machine_mode) d->flag,
35673 FLAGS_REG),
35674 const0_rtx)));
35675 return SUBREG_REG (target);
35677 else
35678 return target;
35682 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
35684 static rtx
35685 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
35686 tree exp, rtx target)
35688 rtx pat;
35689 tree arg0 = CALL_EXPR_ARG (exp, 0);
35690 tree arg1 = CALL_EXPR_ARG (exp, 1);
35691 tree arg2 = CALL_EXPR_ARG (exp, 2);
35692 rtx scratch0, scratch1;
35693 rtx op0 = expand_normal (arg0);
35694 rtx op1 = expand_normal (arg1);
35695 rtx op2 = expand_normal (arg2);
35696 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
35698 tmode0 = insn_data[d->icode].operand[0].mode;
35699 tmode1 = insn_data[d->icode].operand[1].mode;
35700 modev2 = insn_data[d->icode].operand[2].mode;
35701 modev3 = insn_data[d->icode].operand[3].mode;
35702 modeimm = insn_data[d->icode].operand[4].mode;
35704 if (VECTOR_MODE_P (modev2))
35705 op0 = safe_vector_operand (op0, modev2);
35706 if (VECTOR_MODE_P (modev3))
35707 op1 = safe_vector_operand (op1, modev3);
35709 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
35710 op0 = copy_to_mode_reg (modev2, op0);
35711 if ((optimize && !register_operand (op1, modev3))
35712 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
35713 op1 = copy_to_mode_reg (modev3, op1);
35715 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
35717 error ("the third argument must be an 8-bit immediate");
35718 return const0_rtx;
35721 if (d->code == IX86_BUILTIN_PCMPISTRI128)
35723 if (optimize || !target
35724 || GET_MODE (target) != tmode0
35725 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
35726 target = gen_reg_rtx (tmode0);
35728 scratch1 = gen_reg_rtx (tmode1);
35730 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
35732 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
35734 if (optimize || !target
35735 || GET_MODE (target) != tmode1
35736 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
35737 target = gen_reg_rtx (tmode1);
35739 scratch0 = gen_reg_rtx (tmode0);
35741 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
35743 else
35745 gcc_assert (d->flag);
35747 scratch0 = gen_reg_rtx (tmode0);
35748 scratch1 = gen_reg_rtx (tmode1);
35750 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
35753 if (! pat)
35754 return 0;
35756 emit_insn (pat);
35758 if (d->flag)
35760 target = gen_reg_rtx (SImode);
35761 emit_move_insn (target, const0_rtx);
35762 target = gen_rtx_SUBREG (QImode, target, 0);
35764 emit_insn
35765 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
35766 gen_rtx_fmt_ee (EQ, QImode,
35767 gen_rtx_REG ((enum machine_mode) d->flag,
35768 FLAGS_REG),
35769 const0_rtx)));
35770 return SUBREG_REG (target);
35772 else
35773 return target;
35776 /* Subroutine of ix86_expand_builtin to take care of insns with
35777 variable number of operands. */
35779 static rtx
35780 ix86_expand_args_builtin (const struct builtin_description *d,
35781 tree exp, rtx target)
35783 rtx pat, real_target;
35784 unsigned int i, nargs;
35785 unsigned int nargs_constant = 0;
35786 unsigned int mask_pos = 0;
35787 int num_memory = 0;
35788 struct
35790 rtx op;
35791 enum machine_mode mode;
35792 } args[6];
35793 bool last_arg_count = false;
35794 enum insn_code icode = d->icode;
35795 const struct insn_data_d *insn_p = &insn_data[icode];
35796 enum machine_mode tmode = insn_p->operand[0].mode;
35797 enum machine_mode rmode = VOIDmode;
35798 bool swap = false;
35799 enum rtx_code comparison = d->comparison;
35801 switch ((enum ix86_builtin_func_type) d->flag)
35803 case V2DF_FTYPE_V2DF_ROUND:
35804 case V4DF_FTYPE_V4DF_ROUND:
35805 case V4SF_FTYPE_V4SF_ROUND:
35806 case V8SF_FTYPE_V8SF_ROUND:
35807 case V4SI_FTYPE_V4SF_ROUND:
35808 case V8SI_FTYPE_V8SF_ROUND:
35809 return ix86_expand_sse_round (d, exp, target);
35810 case V4SI_FTYPE_V2DF_V2DF_ROUND:
35811 case V8SI_FTYPE_V4DF_V4DF_ROUND:
35812 case V16SI_FTYPE_V8DF_V8DF_ROUND:
35813 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
35814 case INT_FTYPE_V8SF_V8SF_PTEST:
35815 case INT_FTYPE_V4DI_V4DI_PTEST:
35816 case INT_FTYPE_V4DF_V4DF_PTEST:
35817 case INT_FTYPE_V4SF_V4SF_PTEST:
35818 case INT_FTYPE_V2DI_V2DI_PTEST:
35819 case INT_FTYPE_V2DF_V2DF_PTEST:
35820 return ix86_expand_sse_ptest (d, exp, target);
35821 case FLOAT128_FTYPE_FLOAT128:
35822 case FLOAT_FTYPE_FLOAT:
35823 case INT_FTYPE_INT:
35824 case UINT64_FTYPE_INT:
35825 case UINT16_FTYPE_UINT16:
35826 case INT64_FTYPE_INT64:
35827 case INT64_FTYPE_V4SF:
35828 case INT64_FTYPE_V2DF:
35829 case INT_FTYPE_V16QI:
35830 case INT_FTYPE_V8QI:
35831 case INT_FTYPE_V8SF:
35832 case INT_FTYPE_V4DF:
35833 case INT_FTYPE_V4SF:
35834 case INT_FTYPE_V2DF:
35835 case INT_FTYPE_V32QI:
35836 case V16QI_FTYPE_V16QI:
35837 case V8SI_FTYPE_V8SF:
35838 case V8SI_FTYPE_V4SI:
35839 case V8HI_FTYPE_V8HI:
35840 case V8HI_FTYPE_V16QI:
35841 case V8QI_FTYPE_V8QI:
35842 case V8SF_FTYPE_V8SF:
35843 case V8SF_FTYPE_V8SI:
35844 case V8SF_FTYPE_V4SF:
35845 case V8SF_FTYPE_V8HI:
35846 case V4SI_FTYPE_V4SI:
35847 case V4SI_FTYPE_V16QI:
35848 case V4SI_FTYPE_V4SF:
35849 case V4SI_FTYPE_V8SI:
35850 case V4SI_FTYPE_V8HI:
35851 case V4SI_FTYPE_V4DF:
35852 case V4SI_FTYPE_V2DF:
35853 case V4HI_FTYPE_V4HI:
35854 case V4DF_FTYPE_V4DF:
35855 case V4DF_FTYPE_V4SI:
35856 case V4DF_FTYPE_V4SF:
35857 case V4DF_FTYPE_V2DF:
35858 case V4SF_FTYPE_V4SF:
35859 case V4SF_FTYPE_V4SI:
35860 case V4SF_FTYPE_V8SF:
35861 case V4SF_FTYPE_V4DF:
35862 case V4SF_FTYPE_V8HI:
35863 case V4SF_FTYPE_V2DF:
35864 case V2DI_FTYPE_V2DI:
35865 case V2DI_FTYPE_V16QI:
35866 case V2DI_FTYPE_V8HI:
35867 case V2DI_FTYPE_V4SI:
35868 case V2DF_FTYPE_V2DF:
35869 case V2DF_FTYPE_V4SI:
35870 case V2DF_FTYPE_V4DF:
35871 case V2DF_FTYPE_V4SF:
35872 case V2DF_FTYPE_V2SI:
35873 case V2SI_FTYPE_V2SI:
35874 case V2SI_FTYPE_V4SF:
35875 case V2SI_FTYPE_V2SF:
35876 case V2SI_FTYPE_V2DF:
35877 case V2SF_FTYPE_V2SF:
35878 case V2SF_FTYPE_V2SI:
35879 case V32QI_FTYPE_V32QI:
35880 case V32QI_FTYPE_V16QI:
35881 case V16HI_FTYPE_V16HI:
35882 case V16HI_FTYPE_V8HI:
35883 case V8SI_FTYPE_V8SI:
35884 case V16HI_FTYPE_V16QI:
35885 case V8SI_FTYPE_V16QI:
35886 case V4DI_FTYPE_V16QI:
35887 case V8SI_FTYPE_V8HI:
35888 case V4DI_FTYPE_V8HI:
35889 case V4DI_FTYPE_V4SI:
35890 case V4DI_FTYPE_V2DI:
35891 case HI_FTYPE_HI:
35892 case HI_FTYPE_V16QI:
35893 case SI_FTYPE_V32QI:
35894 case DI_FTYPE_V64QI:
35895 case V16QI_FTYPE_HI:
35896 case V32QI_FTYPE_SI:
35897 case V64QI_FTYPE_DI:
35898 case V8HI_FTYPE_QI:
35899 case V16HI_FTYPE_HI:
35900 case V32HI_FTYPE_SI:
35901 case V4SI_FTYPE_QI:
35902 case V8SI_FTYPE_QI:
35903 case V4SI_FTYPE_HI:
35904 case V8SI_FTYPE_HI:
35905 case QI_FTYPE_V8HI:
35906 case HI_FTYPE_V16HI:
35907 case SI_FTYPE_V32HI:
35908 case QI_FTYPE_V4SI:
35909 case QI_FTYPE_V8SI:
35910 case HI_FTYPE_V16SI:
35911 case QI_FTYPE_V2DI:
35912 case QI_FTYPE_V4DI:
35913 case QI_FTYPE_V8DI:
35914 case UINT_FTYPE_V2DF:
35915 case UINT_FTYPE_V4SF:
35916 case UINT64_FTYPE_V2DF:
35917 case UINT64_FTYPE_V4SF:
35918 case V16QI_FTYPE_V8DI:
35919 case V16HI_FTYPE_V16SI:
35920 case V16SI_FTYPE_HI:
35921 case V2DI_FTYPE_QI:
35922 case V4DI_FTYPE_QI:
35923 case V16SI_FTYPE_V16SI:
35924 case V16SI_FTYPE_INT:
35925 case V16SF_FTYPE_FLOAT:
35926 case V16SF_FTYPE_V4SF:
35927 case V16SF_FTYPE_V16SF:
35928 case V8HI_FTYPE_V8DI:
35929 case V8UHI_FTYPE_V8UHI:
35930 case V8SI_FTYPE_V8DI:
35931 case V8SF_FTYPE_V8DF:
35932 case V8DI_FTYPE_QI:
35933 case V8DI_FTYPE_INT64:
35934 case V8DI_FTYPE_V4DI:
35935 case V8DI_FTYPE_V8DI:
35936 case V8DF_FTYPE_DOUBLE:
35937 case V8DF_FTYPE_V4DF:
35938 case V8DF_FTYPE_V8DF:
35939 case V8DF_FTYPE_V8SI:
35940 nargs = 1;
35941 break;
35942 case V4SF_FTYPE_V4SF_VEC_MERGE:
35943 case V2DF_FTYPE_V2DF_VEC_MERGE:
35944 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
35945 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
35946 case V16QI_FTYPE_V16QI_V16QI:
35947 case V16QI_FTYPE_V8HI_V8HI:
35948 case V16SI_FTYPE_V16SI_V16SI:
35949 case V16SF_FTYPE_V16SF_V16SF:
35950 case V16SF_FTYPE_V16SF_V16SI:
35951 case V8QI_FTYPE_V8QI_V8QI:
35952 case V8QI_FTYPE_V4HI_V4HI:
35953 case V8HI_FTYPE_V8HI_V8HI:
35954 case V8HI_FTYPE_V16QI_V16QI:
35955 case V8HI_FTYPE_V4SI_V4SI:
35956 case V8SF_FTYPE_V8SF_V8SF:
35957 case V8SF_FTYPE_V8SF_V8SI:
35958 case V8DI_FTYPE_V8DI_V8DI:
35959 case V8DF_FTYPE_V8DF_V8DF:
35960 case V8DF_FTYPE_V8DF_V8DI:
35961 case V4SI_FTYPE_V4SI_V4SI:
35962 case V4SI_FTYPE_V8HI_V8HI:
35963 case V4SI_FTYPE_V4SF_V4SF:
35964 case V4SI_FTYPE_V2DF_V2DF:
35965 case V4HI_FTYPE_V4HI_V4HI:
35966 case V4HI_FTYPE_V8QI_V8QI:
35967 case V4HI_FTYPE_V2SI_V2SI:
35968 case V4DF_FTYPE_V4DF_V4DF:
35969 case V4DF_FTYPE_V4DF_V4DI:
35970 case V4SF_FTYPE_V4SF_V4SF:
35971 case V4SF_FTYPE_V4SF_V4SI:
35972 case V4SF_FTYPE_V4SF_V2SI:
35973 case V4SF_FTYPE_V4SF_V2DF:
35974 case V4SF_FTYPE_V4SF_UINT:
35975 case V4SF_FTYPE_V4SF_UINT64:
35976 case V4SF_FTYPE_V4SF_DI:
35977 case V4SF_FTYPE_V4SF_SI:
35978 case V2DI_FTYPE_V2DI_V2DI:
35979 case V2DI_FTYPE_V16QI_V16QI:
35980 case V2DI_FTYPE_V4SI_V4SI:
35981 case V2UDI_FTYPE_V4USI_V4USI:
35982 case V2DI_FTYPE_V2DI_V16QI:
35983 case V2DI_FTYPE_V2DF_V2DF:
35984 case V2SI_FTYPE_V2SI_V2SI:
35985 case V2SI_FTYPE_V4HI_V4HI:
35986 case V2SI_FTYPE_V2SF_V2SF:
35987 case V2DF_FTYPE_V2DF_V2DF:
35988 case V2DF_FTYPE_V2DF_V4SF:
35989 case V2DF_FTYPE_V2DF_V2DI:
35990 case V2DF_FTYPE_V2DF_DI:
35991 case V2DF_FTYPE_V2DF_SI:
35992 case V2DF_FTYPE_V2DF_UINT:
35993 case V2DF_FTYPE_V2DF_UINT64:
35994 case V2SF_FTYPE_V2SF_V2SF:
35995 case V1DI_FTYPE_V1DI_V1DI:
35996 case V1DI_FTYPE_V8QI_V8QI:
35997 case V1DI_FTYPE_V2SI_V2SI:
35998 case V32QI_FTYPE_V16HI_V16HI:
35999 case V16HI_FTYPE_V8SI_V8SI:
36000 case V32QI_FTYPE_V32QI_V32QI:
36001 case V16HI_FTYPE_V32QI_V32QI:
36002 case V16HI_FTYPE_V16HI_V16HI:
36003 case V8SI_FTYPE_V4DF_V4DF:
36004 case V8SI_FTYPE_V8SI_V8SI:
36005 case V8SI_FTYPE_V16HI_V16HI:
36006 case V4DI_FTYPE_V4DI_V4DI:
36007 case V4DI_FTYPE_V8SI_V8SI:
36008 case V4UDI_FTYPE_V8USI_V8USI:
36009 case QI_FTYPE_V8DI_V8DI:
36010 case V8DI_FTYPE_V64QI_V64QI:
36011 case HI_FTYPE_V16SI_V16SI:
36012 if (comparison == UNKNOWN)
36013 return ix86_expand_binop_builtin (icode, exp, target);
36014 nargs = 2;
36015 break;
36016 case V4SF_FTYPE_V4SF_V4SF_SWAP:
36017 case V2DF_FTYPE_V2DF_V2DF_SWAP:
36018 gcc_assert (comparison != UNKNOWN);
36019 nargs = 2;
36020 swap = true;
36021 break;
36022 case V16HI_FTYPE_V16HI_V8HI_COUNT:
36023 case V16HI_FTYPE_V16HI_SI_COUNT:
36024 case V8SI_FTYPE_V8SI_V4SI_COUNT:
36025 case V8SI_FTYPE_V8SI_SI_COUNT:
36026 case V4DI_FTYPE_V4DI_V2DI_COUNT:
36027 case V4DI_FTYPE_V4DI_INT_COUNT:
36028 case V8HI_FTYPE_V8HI_V8HI_COUNT:
36029 case V8HI_FTYPE_V8HI_SI_COUNT:
36030 case V4SI_FTYPE_V4SI_V4SI_COUNT:
36031 case V4SI_FTYPE_V4SI_SI_COUNT:
36032 case V4HI_FTYPE_V4HI_V4HI_COUNT:
36033 case V4HI_FTYPE_V4HI_SI_COUNT:
36034 case V2DI_FTYPE_V2DI_V2DI_COUNT:
36035 case V2DI_FTYPE_V2DI_SI_COUNT:
36036 case V2SI_FTYPE_V2SI_V2SI_COUNT:
36037 case V2SI_FTYPE_V2SI_SI_COUNT:
36038 case V1DI_FTYPE_V1DI_V1DI_COUNT:
36039 case V1DI_FTYPE_V1DI_SI_COUNT:
36040 nargs = 2;
36041 last_arg_count = true;
36042 break;
36043 case UINT64_FTYPE_UINT64_UINT64:
36044 case UINT_FTYPE_UINT_UINT:
36045 case UINT_FTYPE_UINT_USHORT:
36046 case UINT_FTYPE_UINT_UCHAR:
36047 case UINT16_FTYPE_UINT16_INT:
36048 case UINT8_FTYPE_UINT8_INT:
36049 case HI_FTYPE_HI_HI:
36050 case SI_FTYPE_SI_SI:
36051 case DI_FTYPE_DI_DI:
36052 case V16SI_FTYPE_V8DF_V8DF:
36053 nargs = 2;
36054 break;
36055 case V2DI_FTYPE_V2DI_INT_CONVERT:
36056 nargs = 2;
36057 rmode = V1TImode;
36058 nargs_constant = 1;
36059 break;
36060 case V4DI_FTYPE_V4DI_INT_CONVERT:
36061 nargs = 2;
36062 rmode = V2TImode;
36063 nargs_constant = 1;
36064 break;
36065 case V8DI_FTYPE_V8DI_INT_CONVERT:
36066 nargs = 2;
36067 rmode = V4TImode;
36068 nargs_constant = 1;
36069 break;
36070 case V8HI_FTYPE_V8HI_INT:
36071 case V8HI_FTYPE_V8SF_INT:
36072 case V16HI_FTYPE_V16SF_INT:
36073 case V8HI_FTYPE_V4SF_INT:
36074 case V8SF_FTYPE_V8SF_INT:
36075 case V4SF_FTYPE_V16SF_INT:
36076 case V16SF_FTYPE_V16SF_INT:
36077 case V4SI_FTYPE_V4SI_INT:
36078 case V4SI_FTYPE_V8SI_INT:
36079 case V4HI_FTYPE_V4HI_INT:
36080 case V4DF_FTYPE_V4DF_INT:
36081 case V4DF_FTYPE_V8DF_INT:
36082 case V4SF_FTYPE_V4SF_INT:
36083 case V4SF_FTYPE_V8SF_INT:
36084 case V2DI_FTYPE_V2DI_INT:
36085 case V2DF_FTYPE_V2DF_INT:
36086 case V2DF_FTYPE_V4DF_INT:
36087 case V16HI_FTYPE_V16HI_INT:
36088 case V8SI_FTYPE_V8SI_INT:
36089 case V16SI_FTYPE_V16SI_INT:
36090 case V4SI_FTYPE_V16SI_INT:
36091 case V4DI_FTYPE_V4DI_INT:
36092 case V2DI_FTYPE_V4DI_INT:
36093 case V4DI_FTYPE_V8DI_INT:
36094 case HI_FTYPE_HI_INT:
36095 case QI_FTYPE_V4SF_INT:
36096 case QI_FTYPE_V2DF_INT:
36097 nargs = 2;
36098 nargs_constant = 1;
36099 break;
36100 case V16QI_FTYPE_V16QI_V16QI_V16QI:
36101 case V8SF_FTYPE_V8SF_V8SF_V8SF:
36102 case V4DF_FTYPE_V4DF_V4DF_V4DF:
36103 case V4SF_FTYPE_V4SF_V4SF_V4SF:
36104 case V2DF_FTYPE_V2DF_V2DF_V2DF:
36105 case V32QI_FTYPE_V32QI_V32QI_V32QI:
36106 case HI_FTYPE_V16SI_V16SI_HI:
36107 case QI_FTYPE_V8DI_V8DI_QI:
36108 case V16HI_FTYPE_V16SI_V16HI_HI:
36109 case V16QI_FTYPE_V16SI_V16QI_HI:
36110 case V16QI_FTYPE_V8DI_V16QI_QI:
36111 case V16SF_FTYPE_V16SF_V16SF_HI:
36112 case V16SF_FTYPE_V16SF_V16SF_V16SF:
36113 case V16SF_FTYPE_V16SF_V16SI_V16SF:
36114 case V16SF_FTYPE_V16SI_V16SF_HI:
36115 case V16SF_FTYPE_V16SI_V16SF_V16SF:
36116 case V16SF_FTYPE_V4SF_V16SF_HI:
36117 case V16SI_FTYPE_SI_V16SI_HI:
36118 case V16SI_FTYPE_V16HI_V16SI_HI:
36119 case V16SI_FTYPE_V16QI_V16SI_HI:
36120 case V16SI_FTYPE_V16SF_V16SI_HI:
36121 case V8SF_FTYPE_V4SF_V8SF_QI:
36122 case V4DF_FTYPE_V2DF_V4DF_QI:
36123 case V8SI_FTYPE_V4SI_V8SI_QI:
36124 case V8SI_FTYPE_SI_V8SI_QI:
36125 case V4SI_FTYPE_V4SI_V4SI_QI:
36126 case V4SI_FTYPE_SI_V4SI_QI:
36127 case V4DI_FTYPE_V2DI_V4DI_QI:
36128 case V4DI_FTYPE_DI_V4DI_QI:
36129 case V2DI_FTYPE_V2DI_V2DI_QI:
36130 case V2DI_FTYPE_DI_V2DI_QI:
36131 case V64QI_FTYPE_V64QI_V64QI_DI:
36132 case V64QI_FTYPE_V16QI_V64QI_DI:
36133 case V64QI_FTYPE_QI_V64QI_DI:
36134 case V32QI_FTYPE_V32QI_V32QI_SI:
36135 case V32QI_FTYPE_V16QI_V32QI_SI:
36136 case V32QI_FTYPE_QI_V32QI_SI:
36137 case V16QI_FTYPE_V16QI_V16QI_HI:
36138 case V16QI_FTYPE_QI_V16QI_HI:
36139 case V32HI_FTYPE_V8HI_V32HI_SI:
36140 case V32HI_FTYPE_HI_V32HI_SI:
36141 case V16HI_FTYPE_V8HI_V16HI_HI:
36142 case V16HI_FTYPE_HI_V16HI_HI:
36143 case V8HI_FTYPE_V8HI_V8HI_QI:
36144 case V8HI_FTYPE_HI_V8HI_QI:
36145 case V8SF_FTYPE_V8HI_V8SF_QI:
36146 case V4SF_FTYPE_V8HI_V4SF_QI:
36147 case V8SI_FTYPE_V8SF_V8SI_QI:
36148 case V4SI_FTYPE_V4SF_V4SI_QI:
36149 case V8DI_FTYPE_V8SF_V8DI_QI:
36150 case V4DI_FTYPE_V4SF_V4DI_QI:
36151 case V2DI_FTYPE_V4SF_V2DI_QI:
36152 case V8SF_FTYPE_V8DI_V8SF_QI:
36153 case V4SF_FTYPE_V4DI_V4SF_QI:
36154 case V4SF_FTYPE_V2DI_V4SF_QI:
36155 case V8DF_FTYPE_V8DI_V8DF_QI:
36156 case V4DF_FTYPE_V4DI_V4DF_QI:
36157 case V2DF_FTYPE_V2DI_V2DF_QI:
36158 case V16QI_FTYPE_V8HI_V16QI_QI:
36159 case V16QI_FTYPE_V16HI_V16QI_HI:
36160 case V16QI_FTYPE_V4SI_V16QI_QI:
36161 case V16QI_FTYPE_V8SI_V16QI_QI:
36162 case V8HI_FTYPE_V4SI_V8HI_QI:
36163 case V8HI_FTYPE_V8SI_V8HI_QI:
36164 case V16QI_FTYPE_V2DI_V16QI_QI:
36165 case V16QI_FTYPE_V4DI_V16QI_QI:
36166 case V8HI_FTYPE_V2DI_V8HI_QI:
36167 case V8HI_FTYPE_V4DI_V8HI_QI:
36168 case V4SI_FTYPE_V2DI_V4SI_QI:
36169 case V4SI_FTYPE_V4DI_V4SI_QI:
36170 case V32QI_FTYPE_V32HI_V32QI_SI:
36171 case HI_FTYPE_V16QI_V16QI_HI:
36172 case SI_FTYPE_V32QI_V32QI_SI:
36173 case DI_FTYPE_V64QI_V64QI_DI:
36174 case QI_FTYPE_V8HI_V8HI_QI:
36175 case HI_FTYPE_V16HI_V16HI_HI:
36176 case SI_FTYPE_V32HI_V32HI_SI:
36177 case QI_FTYPE_V4SI_V4SI_QI:
36178 case QI_FTYPE_V8SI_V8SI_QI:
36179 case QI_FTYPE_V2DI_V2DI_QI:
36180 case QI_FTYPE_V4DI_V4DI_QI:
36181 case V4SF_FTYPE_V2DF_V4SF_QI:
36182 case V4SF_FTYPE_V4DF_V4SF_QI:
36183 nargs = 3;
36184 case V16SI_FTYPE_V16SI_V16SI_HI:
36185 case V16SI_FTYPE_V16SI_V16SI_V16SI:
36186 case V16SI_FTYPE_V4SI_V16SI_HI:
36187 case V2DI_FTYPE_V2DI_V2DI_V2DI:
36188 case V2DI_FTYPE_V4SI_V2DI_QI:
36189 case V2DI_FTYPE_V8HI_V2DI_QI:
36190 case V2DI_FTYPE_V16QI_V2DI_QI:
36191 case V4DI_FTYPE_V4DI_V4DI_QI:
36192 case V4DI_FTYPE_V4SI_V4DI_QI:
36193 case V4DI_FTYPE_V8HI_V4DI_QI:
36194 case V4DI_FTYPE_V16QI_V4DI_QI:
36195 case V8DI_FTYPE_V8DF_V8DI_QI:
36196 case V4DI_FTYPE_V4DF_V4DI_QI:
36197 case V2DI_FTYPE_V2DF_V2DI_QI:
36198 case V4SI_FTYPE_V4DF_V4SI_QI:
36199 case V4SI_FTYPE_V2DF_V4SI_QI:
36200 case V4SI_FTYPE_V8HI_V4SI_QI:
36201 case V4SI_FTYPE_V16QI_V4SI_QI:
36202 case V8SI_FTYPE_V8SI_V8SI_V8SI:
36203 case V4DI_FTYPE_V4DI_V4DI_V4DI:
36204 case V8DF_FTYPE_V2DF_V8DF_QI:
36205 case V8DF_FTYPE_V4DF_V8DF_QI:
36206 case V8DF_FTYPE_V8DF_V8DF_QI:
36207 case V8DF_FTYPE_V8DF_V8DF_V8DF:
36208 case V8SF_FTYPE_V8SF_V8SF_QI:
36209 case V8SF_FTYPE_V8SI_V8SF_QI:
36210 case V4DF_FTYPE_V4DF_V4DF_QI:
36211 case V4SF_FTYPE_V4SF_V4SF_QI:
36212 case V2DF_FTYPE_V2DF_V2DF_QI:
36213 case V2DF_FTYPE_V4SF_V2DF_QI:
36214 case V2DF_FTYPE_V4SI_V2DF_QI:
36215 case V4SF_FTYPE_V4SI_V4SF_QI:
36216 case V4DF_FTYPE_V4SF_V4DF_QI:
36217 case V4DF_FTYPE_V4SI_V4DF_QI:
36218 case V8SI_FTYPE_V8SI_V8SI_QI:
36219 case V8SI_FTYPE_V8HI_V8SI_QI:
36220 case V8SI_FTYPE_V16QI_V8SI_QI:
36221 case V8DF_FTYPE_V8DF_V8DI_V8DF:
36222 case V8DF_FTYPE_V8DI_V8DF_V8DF:
36223 case V8DF_FTYPE_V8SF_V8DF_QI:
36224 case V8DF_FTYPE_V8SI_V8DF_QI:
36225 case V8DI_FTYPE_DI_V8DI_QI:
36226 case V16SF_FTYPE_V8SF_V16SF_HI:
36227 case V16SI_FTYPE_V8SI_V16SI_HI:
36228 case V16HI_FTYPE_V16HI_V16HI_HI:
36229 case V8HI_FTYPE_V16QI_V8HI_QI:
36230 case V16HI_FTYPE_V16QI_V16HI_HI:
36231 case V32HI_FTYPE_V32HI_V32HI_SI:
36232 case V32HI_FTYPE_V32QI_V32HI_SI:
36233 case V8DI_FTYPE_V16QI_V8DI_QI:
36234 case V8DI_FTYPE_V2DI_V8DI_QI:
36235 case V8DI_FTYPE_V4DI_V8DI_QI:
36236 case V8DI_FTYPE_V8DI_V8DI_QI:
36237 case V8DI_FTYPE_V8DI_V8DI_V8DI:
36238 case V8DI_FTYPE_V8HI_V8DI_QI:
36239 case V8DI_FTYPE_V8SI_V8DI_QI:
36240 case V8HI_FTYPE_V8DI_V8HI_QI:
36241 case V8SF_FTYPE_V8DF_V8SF_QI:
36242 case V8SI_FTYPE_V8DF_V8SI_QI:
36243 case V8SI_FTYPE_V8DI_V8SI_QI:
36244 case V4SI_FTYPE_V4SI_V4SI_V4SI:
36245 nargs = 3;
36246 break;
36247 case V32QI_FTYPE_V32QI_V32QI_INT:
36248 case V16HI_FTYPE_V16HI_V16HI_INT:
36249 case V16QI_FTYPE_V16QI_V16QI_INT:
36250 case V4DI_FTYPE_V4DI_V4DI_INT:
36251 case V8HI_FTYPE_V8HI_V8HI_INT:
36252 case V8SI_FTYPE_V8SI_V8SI_INT:
36253 case V8SI_FTYPE_V8SI_V4SI_INT:
36254 case V8SF_FTYPE_V8SF_V8SF_INT:
36255 case V8SF_FTYPE_V8SF_V4SF_INT:
36256 case V4SI_FTYPE_V4SI_V4SI_INT:
36257 case V4DF_FTYPE_V4DF_V4DF_INT:
36258 case V16SF_FTYPE_V16SF_V16SF_INT:
36259 case V16SF_FTYPE_V16SF_V4SF_INT:
36260 case V16SI_FTYPE_V16SI_V4SI_INT:
36261 case V4DF_FTYPE_V4DF_V2DF_INT:
36262 case V4SF_FTYPE_V4SF_V4SF_INT:
36263 case V2DI_FTYPE_V2DI_V2DI_INT:
36264 case V4DI_FTYPE_V4DI_V2DI_INT:
36265 case V2DF_FTYPE_V2DF_V2DF_INT:
36266 case QI_FTYPE_V8DI_V8DI_INT:
36267 case QI_FTYPE_V8DF_V8DF_INT:
36268 case QI_FTYPE_V2DF_V2DF_INT:
36269 case QI_FTYPE_V4SF_V4SF_INT:
36270 case HI_FTYPE_V16SI_V16SI_INT:
36271 case HI_FTYPE_V16SF_V16SF_INT:
36272 nargs = 3;
36273 nargs_constant = 1;
36274 break;
36275 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
36276 nargs = 3;
36277 rmode = V4DImode;
36278 nargs_constant = 1;
36279 break;
36280 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
36281 nargs = 3;
36282 rmode = V2DImode;
36283 nargs_constant = 1;
36284 break;
36285 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
36286 nargs = 3;
36287 rmode = DImode;
36288 nargs_constant = 1;
36289 break;
36290 case V2DI_FTYPE_V2DI_UINT_UINT:
36291 nargs = 3;
36292 nargs_constant = 2;
36293 break;
36294 case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
36295 nargs = 3;
36296 rmode = V8DImode;
36297 nargs_constant = 1;
36298 break;
36299 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
36300 nargs = 5;
36301 rmode = V8DImode;
36302 mask_pos = 2;
36303 nargs_constant = 1;
36304 break;
36305 case QI_FTYPE_V8DF_INT_QI:
36306 case QI_FTYPE_V4DF_INT_QI:
36307 case QI_FTYPE_V2DF_INT_QI:
36308 case HI_FTYPE_V16SF_INT_HI:
36309 case QI_FTYPE_V8SF_INT_QI:
36310 case QI_FTYPE_V4SF_INT_QI:
36311 nargs = 3;
36312 mask_pos = 1;
36313 nargs_constant = 1;
36314 break;
36315 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
36316 nargs = 5;
36317 rmode = V4DImode;
36318 mask_pos = 2;
36319 nargs_constant = 1;
36320 break;
36321 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
36322 nargs = 5;
36323 rmode = V2DImode;
36324 mask_pos = 2;
36325 nargs_constant = 1;
36326 break;
36327 case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
36328 case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
36329 case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
36330 case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
36331 case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
36332 case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
36333 case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
36334 case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
36335 case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
36336 case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
36337 case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
36338 case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
36339 case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
36340 case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
36341 case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
36342 case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
36343 case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
36344 case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
36345 case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
36346 case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
36347 case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
36348 case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
36349 case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
36350 case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
36351 case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
36352 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
36353 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
36354 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
36355 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
36356 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
36357 case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
36358 case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
36359 case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
36360 case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
36361 case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
36362 case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
36363 case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
36364 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
36365 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
36366 case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
36367 case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
36368 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
36369 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
36370 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
36371 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
36372 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
36373 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
36374 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
36375 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
36376 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
36377 case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
36378 case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
36379 case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
36380 case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
36381 nargs = 4;
36382 break;
36383 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
36384 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
36385 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
36386 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
36387 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
36388 nargs = 4;
36389 nargs_constant = 1;
36390 break;
36391 case QI_FTYPE_V4DI_V4DI_INT_QI:
36392 case QI_FTYPE_V8SI_V8SI_INT_QI:
36393 case QI_FTYPE_V4DF_V4DF_INT_QI:
36394 case QI_FTYPE_V8SF_V8SF_INT_QI:
36395 case QI_FTYPE_V2DI_V2DI_INT_QI:
36396 case QI_FTYPE_V4SI_V4SI_INT_QI:
36397 case QI_FTYPE_V2DF_V2DF_INT_QI:
36398 case QI_FTYPE_V4SF_V4SF_INT_QI:
36399 case DI_FTYPE_V64QI_V64QI_INT_DI:
36400 case SI_FTYPE_V32QI_V32QI_INT_SI:
36401 case HI_FTYPE_V16QI_V16QI_INT_HI:
36402 case SI_FTYPE_V32HI_V32HI_INT_SI:
36403 case HI_FTYPE_V16HI_V16HI_INT_HI:
36404 case QI_FTYPE_V8HI_V8HI_INT_QI:
36405 nargs = 4;
36406 mask_pos = 1;
36407 nargs_constant = 1;
36408 break;
36409 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
36410 nargs = 4;
36411 nargs_constant = 2;
36412 break;
36413 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
36414 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
36415 nargs = 4;
36416 break;
36417 case QI_FTYPE_V8DI_V8DI_INT_QI:
36418 case HI_FTYPE_V16SI_V16SI_INT_HI:
36419 case QI_FTYPE_V8DF_V8DF_INT_QI:
36420 case HI_FTYPE_V16SF_V16SF_INT_HI:
36421 mask_pos = 1;
36422 nargs = 4;
36423 nargs_constant = 1;
36424 break;
36425 case V8SF_FTYPE_V8SF_INT_V8SF_QI:
36426 case V4SF_FTYPE_V4SF_INT_V4SF_QI:
36427 case V2DF_FTYPE_V4DF_INT_V2DF_QI:
36428 case V2DI_FTYPE_V4DI_INT_V2DI_QI:
36429 case V8SF_FTYPE_V16SF_INT_V8SF_QI:
36430 case V8SI_FTYPE_V16SI_INT_V8SI_QI:
36431 case V2DF_FTYPE_V8DF_INT_V2DF_QI:
36432 case V2DI_FTYPE_V8DI_INT_V2DI_QI:
36433 case V4SF_FTYPE_V8SF_INT_V4SF_QI:
36434 case V4SI_FTYPE_V8SI_INT_V4SI_QI:
36435 case V8HI_FTYPE_V8SF_INT_V8HI_QI:
36436 case V8HI_FTYPE_V4SF_INT_V8HI_QI:
36437 case V32HI_FTYPE_V32HI_INT_V32HI_SI:
36438 case V16HI_FTYPE_V16HI_INT_V16HI_HI:
36439 case V8HI_FTYPE_V8HI_INT_V8HI_QI:
36440 case V4DI_FTYPE_V4DI_INT_V4DI_QI:
36441 case V2DI_FTYPE_V2DI_INT_V2DI_QI:
36442 case V8SI_FTYPE_V8SI_INT_V8SI_QI:
36443 case V4SI_FTYPE_V4SI_INT_V4SI_QI:
36444 case V4DF_FTYPE_V4DF_INT_V4DF_QI:
36445 case V2DF_FTYPE_V2DF_INT_V2DF_QI:
36446 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
36447 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
36448 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
36449 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
36450 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
36451 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
36452 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
36453 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
36454 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
36455 nargs = 4;
36456 mask_pos = 2;
36457 nargs_constant = 1;
36458 break;
36459 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
36460 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
36461 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
36462 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
36463 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
36464 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
36465 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
36466 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
36467 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
36468 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
36469 case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
36470 case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
36471 case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
36472 case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
36473 case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
36474 case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
36475 case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
36476 case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
36477 case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
36478 case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
36479 case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
36480 case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
36481 case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
36482 case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
36483 case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
36484 case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
36485 case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
36486 nargs = 5;
36487 mask_pos = 2;
36488 nargs_constant = 1;
36489 break;
36490 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
36491 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
36492 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
36493 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
36494 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
36495 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
36496 case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
36497 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
36498 case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
36499 case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
36500 case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
36501 nargs = 5;
36502 nargs = 5;
36503 mask_pos = 1;
36504 nargs_constant = 1;
36505 break;
36507 default:
36508 gcc_unreachable ();
36511 gcc_assert (nargs <= ARRAY_SIZE (args));
36513 if (comparison != UNKNOWN)
36515 gcc_assert (nargs == 2);
36516 return ix86_expand_sse_compare (d, exp, target, swap);
36519 if (rmode == VOIDmode || rmode == tmode)
36521 if (optimize
36522 || target == 0
36523 || GET_MODE (target) != tmode
36524 || !insn_p->operand[0].predicate (target, tmode))
36525 target = gen_reg_rtx (tmode);
36526 real_target = target;
36528 else
36530 real_target = gen_reg_rtx (tmode);
36531 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
36534 for (i = 0; i < nargs; i++)
36536 tree arg = CALL_EXPR_ARG (exp, i);
36537 rtx op = expand_normal (arg);
36538 enum machine_mode mode = insn_p->operand[i + 1].mode;
36539 bool match = insn_p->operand[i + 1].predicate (op, mode);
36541 if (last_arg_count && (i + 1) == nargs)
36543 /* SIMD shift insns take either an 8-bit immediate or
36544 register as count. But builtin functions take int as
36545 count. If count doesn't match, we put it in register. */
36546 if (!match)
36548 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
36549 if (!insn_p->operand[i + 1].predicate (op, mode))
36550 op = copy_to_reg (op);
36553 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
36554 (!mask_pos && (nargs - i) <= nargs_constant))
36556 if (!match)
36557 switch (icode)
36559 case CODE_FOR_avx_vinsertf128v4di:
36560 case CODE_FOR_avx_vextractf128v4di:
36561 error ("the last argument must be an 1-bit immediate");
36562 return const0_rtx;
36564 case CODE_FOR_avx512f_cmpv8di3_mask:
36565 case CODE_FOR_avx512f_cmpv16si3_mask:
36566 case CODE_FOR_avx512f_ucmpv8di3_mask:
36567 case CODE_FOR_avx512f_ucmpv16si3_mask:
36568 case CODE_FOR_avx512vl_cmpv4di3_mask:
36569 case CODE_FOR_avx512vl_cmpv8si3_mask:
36570 case CODE_FOR_avx512vl_ucmpv4di3_mask:
36571 case CODE_FOR_avx512vl_ucmpv8si3_mask:
36572 case CODE_FOR_avx512vl_cmpv2di3_mask:
36573 case CODE_FOR_avx512vl_cmpv4si3_mask:
36574 case CODE_FOR_avx512vl_ucmpv2di3_mask:
36575 case CODE_FOR_avx512vl_ucmpv4si3_mask:
36576 error ("the last argument must be a 3-bit immediate");
36577 return const0_rtx;
36579 case CODE_FOR_sse4_1_roundsd:
36580 case CODE_FOR_sse4_1_roundss:
36582 case CODE_FOR_sse4_1_roundpd:
36583 case CODE_FOR_sse4_1_roundps:
36584 case CODE_FOR_avx_roundpd256:
36585 case CODE_FOR_avx_roundps256:
36587 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
36588 case CODE_FOR_sse4_1_roundps_sfix:
36589 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
36590 case CODE_FOR_avx_roundps_sfix256:
36592 case CODE_FOR_sse4_1_blendps:
36593 case CODE_FOR_avx_blendpd256:
36594 case CODE_FOR_avx_vpermilv4df:
36595 case CODE_FOR_avx_vpermilv4df_mask:
36596 case CODE_FOR_avx512f_getmantv8df_mask:
36597 case CODE_FOR_avx512f_getmantv16sf_mask:
36598 case CODE_FOR_avx512vl_getmantv8sf_mask:
36599 case CODE_FOR_avx512vl_getmantv4df_mask:
36600 case CODE_FOR_avx512vl_getmantv4sf_mask:
36601 case CODE_FOR_avx512vl_getmantv2df_mask:
36602 case CODE_FOR_avx512dq_rangepv8df_mask_round:
36603 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
36604 case CODE_FOR_avx512dq_rangepv4df_mask:
36605 case CODE_FOR_avx512dq_rangepv8sf_mask:
36606 case CODE_FOR_avx512dq_rangepv2df_mask:
36607 case CODE_FOR_avx512dq_rangepv4sf_mask:
36608 case CODE_FOR_avx_shufpd256_mask:
36609 error ("the last argument must be a 4-bit immediate");
36610 return const0_rtx;
36612 case CODE_FOR_sha1rnds4:
36613 case CODE_FOR_sse4_1_blendpd:
36614 case CODE_FOR_avx_vpermilv2df:
36615 case CODE_FOR_avx_vpermilv2df_mask:
36616 case CODE_FOR_xop_vpermil2v2df3:
36617 case CODE_FOR_xop_vpermil2v4sf3:
36618 case CODE_FOR_xop_vpermil2v4df3:
36619 case CODE_FOR_xop_vpermil2v8sf3:
36620 case CODE_FOR_avx512f_vinsertf32x4_mask:
36621 case CODE_FOR_avx512f_vinserti32x4_mask:
36622 case CODE_FOR_avx512f_vextractf32x4_mask:
36623 case CODE_FOR_avx512f_vextracti32x4_mask:
36624 case CODE_FOR_sse2_shufpd:
36625 case CODE_FOR_sse2_shufpd_mask:
36626 case CODE_FOR_avx512dq_shuf_f64x2_mask:
36627 case CODE_FOR_avx512dq_shuf_i64x2_mask:
36628 case CODE_FOR_avx512vl_shuf_i32x4_mask:
36629 case CODE_FOR_avx512vl_shuf_f32x4_mask:
36630 error ("the last argument must be a 2-bit immediate");
36631 return const0_rtx;
36633 case CODE_FOR_avx_vextractf128v4df:
36634 case CODE_FOR_avx_vextractf128v8sf:
36635 case CODE_FOR_avx_vextractf128v8si:
36636 case CODE_FOR_avx_vinsertf128v4df:
36637 case CODE_FOR_avx_vinsertf128v8sf:
36638 case CODE_FOR_avx_vinsertf128v8si:
36639 case CODE_FOR_avx512f_vinsertf64x4_mask:
36640 case CODE_FOR_avx512f_vinserti64x4_mask:
36641 case CODE_FOR_avx512f_vextractf64x4_mask:
36642 case CODE_FOR_avx512f_vextracti64x4_mask:
36643 case CODE_FOR_avx512dq_vinsertf32x8_mask:
36644 case CODE_FOR_avx512dq_vinserti32x8_mask:
36645 case CODE_FOR_avx512vl_vinsertv4df:
36646 case CODE_FOR_avx512vl_vinsertv4di:
36647 case CODE_FOR_avx512vl_vinsertv8sf:
36648 case CODE_FOR_avx512vl_vinsertv8si:
36649 error ("the last argument must be a 1-bit immediate");
36650 return const0_rtx;
36652 case CODE_FOR_avx_vmcmpv2df3:
36653 case CODE_FOR_avx_vmcmpv4sf3:
36654 case CODE_FOR_avx_cmpv2df3:
36655 case CODE_FOR_avx_cmpv4sf3:
36656 case CODE_FOR_avx_cmpv4df3:
36657 case CODE_FOR_avx_cmpv8sf3:
36658 case CODE_FOR_avx512f_cmpv8df3_mask:
36659 case CODE_FOR_avx512f_cmpv16sf3_mask:
36660 case CODE_FOR_avx512f_vmcmpv2df3_mask:
36661 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
36662 error ("the last argument must be a 5-bit immediate");
36663 return const0_rtx;
36665 default:
36666 switch (nargs_constant)
36668 case 2:
36669 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
36670 (!mask_pos && (nargs - i) == nargs_constant))
36672 error ("the next to last argument must be an 8-bit immediate");
36673 break;
36675 case 1:
36676 error ("the last argument must be an 8-bit immediate");
36677 break;
36678 default:
36679 gcc_unreachable ();
36681 return const0_rtx;
36684 else
36686 if (VECTOR_MODE_P (mode))
36687 op = safe_vector_operand (op, mode);
36689 /* If we aren't optimizing, only allow one memory operand to
36690 be generated. */
36691 if (memory_operand (op, mode))
36692 num_memory++;
36694 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
36696 if (optimize || !match || num_memory > 1)
36697 op = copy_to_mode_reg (mode, op);
36699 else
36701 op = copy_to_reg (op);
36702 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
36706 args[i].op = op;
36707 args[i].mode = mode;
36710 switch (nargs)
36712 case 1:
36713 pat = GEN_FCN (icode) (real_target, args[0].op);
36714 break;
36715 case 2:
36716 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
36717 break;
36718 case 3:
36719 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36720 args[2].op);
36721 break;
36722 case 4:
36723 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36724 args[2].op, args[3].op);
36725 break;
36726 case 5:
36727 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36728 args[2].op, args[3].op, args[4].op);
36729 case 6:
36730 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
36731 args[2].op, args[3].op, args[4].op,
36732 args[5].op);
36733 break;
36734 default:
36735 gcc_unreachable ();
36738 if (! pat)
36739 return 0;
36741 emit_insn (pat);
36742 return target;
36745 /* Transform pattern of following layout:
36746 (parallel [
36747 set (A B)
36748 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
36750 into:
36751 (set (A B))
36754 (parallel [ A B
36756 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
36759 into:
36760 (parallel [ A B ... ]) */
36762 static rtx
36763 ix86_erase_embedded_rounding (rtx pat)
36765 if (GET_CODE (pat) == INSN)
36766 pat = PATTERN (pat);
36768 gcc_assert (GET_CODE (pat) == PARALLEL);
36770 if (XVECLEN (pat, 0) == 2)
36772 rtx p0 = XVECEXP (pat, 0, 0);
36773 rtx p1 = XVECEXP (pat, 0, 1);
36775 gcc_assert (GET_CODE (p0) == SET
36776 && GET_CODE (p1) == UNSPEC
36777 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
36779 return p0;
36781 else
36783 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
36784 int i = 0;
36785 int j = 0;
36787 for (; i < XVECLEN (pat, 0); ++i)
36789 rtx elem = XVECEXP (pat, 0, i);
36790 if (GET_CODE (elem) != UNSPEC
36791 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
36792 res [j++] = elem;
36795 /* No more than 1 occurence was removed. */
36796 gcc_assert (j >= XVECLEN (pat, 0) - 1);
36798 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
36802 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
36803 with rounding. */
36804 static rtx
36805 ix86_expand_sse_comi_round (const struct builtin_description *d,
36806 tree exp, rtx target)
36808 rtx pat, set_dst;
36809 tree arg0 = CALL_EXPR_ARG (exp, 0);
36810 tree arg1 = CALL_EXPR_ARG (exp, 1);
36811 tree arg2 = CALL_EXPR_ARG (exp, 2);
36812 tree arg3 = CALL_EXPR_ARG (exp, 3);
36813 rtx op0 = expand_normal (arg0);
36814 rtx op1 = expand_normal (arg1);
36815 rtx op2 = expand_normal (arg2);
36816 rtx op3 = expand_normal (arg3);
36817 enum insn_code icode = d->icode;
36818 const struct insn_data_d *insn_p = &insn_data[icode];
36819 enum machine_mode mode0 = insn_p->operand[0].mode;
36820 enum machine_mode mode1 = insn_p->operand[1].mode;
36821 enum rtx_code comparison = UNEQ;
36822 bool need_ucomi = false;
36824 /* See avxintrin.h for values. */
36825 enum rtx_code comi_comparisons[32] =
36827 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
36828 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
36829 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
36831 bool need_ucomi_values[32] =
36833 true, false, false, true, true, false, false, true,
36834 true, false, false, true, true, false, false, true,
36835 false, true, true, false, false, true, true, false,
36836 false, true, true, false, false, true, true, false
36839 if (!CONST_INT_P (op2))
36841 error ("the third argument must be comparison constant");
36842 return const0_rtx;
36844 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
36846 error ("incorect comparison mode");
36847 return const0_rtx;
36850 if (!insn_p->operand[2].predicate (op3, SImode))
36852 error ("incorrect rounding operand");
36853 return const0_rtx;
36856 comparison = comi_comparisons[INTVAL (op2)];
36857 need_ucomi = need_ucomi_values[INTVAL (op2)];
36859 if (VECTOR_MODE_P (mode0))
36860 op0 = safe_vector_operand (op0, mode0);
36861 if (VECTOR_MODE_P (mode1))
36862 op1 = safe_vector_operand (op1, mode1);
36864 target = gen_reg_rtx (SImode);
36865 emit_move_insn (target, const0_rtx);
36866 target = gen_rtx_SUBREG (QImode, target, 0);
36868 if ((optimize && !register_operand (op0, mode0))
36869 || !insn_p->operand[0].predicate (op0, mode0))
36870 op0 = copy_to_mode_reg (mode0, op0);
36871 if ((optimize && !register_operand (op1, mode1))
36872 || !insn_p->operand[1].predicate (op1, mode1))
36873 op1 = copy_to_mode_reg (mode1, op1);
36875 if (need_ucomi)
36876 icode = icode == CODE_FOR_sse_comi_round
36877 ? CODE_FOR_sse_ucomi_round
36878 : CODE_FOR_sse2_ucomi_round;
36880 pat = GEN_FCN (icode) (op0, op1, op3);
36881 if (! pat)
36882 return 0;
36884 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
36885 if (INTVAL (op3) == NO_ROUND)
36887 pat = ix86_erase_embedded_rounding (pat);
36888 if (! pat)
36889 return 0;
36891 set_dst = SET_DEST (pat);
36893 else
36895 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
36896 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
36899 emit_insn (pat);
36900 emit_insn (gen_rtx_SET (VOIDmode,
36901 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36902 gen_rtx_fmt_ee (comparison, QImode,
36903 set_dst,
36904 const0_rtx)));
36906 return SUBREG_REG (target);
36909 static rtx
36910 ix86_expand_round_builtin (const struct builtin_description *d,
36911 tree exp, rtx target)
36913 rtx pat;
36914 unsigned int i, nargs;
36915 struct
36917 rtx op;
36918 enum machine_mode mode;
36919 } args[6];
36920 enum insn_code icode = d->icode;
36921 const struct insn_data_d *insn_p = &insn_data[icode];
36922 enum machine_mode tmode = insn_p->operand[0].mode;
36923 unsigned int nargs_constant = 0;
36924 unsigned int redundant_embed_rnd = 0;
36926 switch ((enum ix86_builtin_func_type) d->flag)
36928 case UINT64_FTYPE_V2DF_INT:
36929 case UINT64_FTYPE_V4SF_INT:
36930 case UINT_FTYPE_V2DF_INT:
36931 case UINT_FTYPE_V4SF_INT:
36932 case INT64_FTYPE_V2DF_INT:
36933 case INT64_FTYPE_V4SF_INT:
36934 case INT_FTYPE_V2DF_INT:
36935 case INT_FTYPE_V4SF_INT:
36936 nargs = 2;
36937 break;
36938 case V4SF_FTYPE_V4SF_UINT_INT:
36939 case V4SF_FTYPE_V4SF_UINT64_INT:
36940 case V2DF_FTYPE_V2DF_UINT64_INT:
36941 case V4SF_FTYPE_V4SF_INT_INT:
36942 case V4SF_FTYPE_V4SF_INT64_INT:
36943 case V2DF_FTYPE_V2DF_INT64_INT:
36944 case V4SF_FTYPE_V4SF_V4SF_INT:
36945 case V2DF_FTYPE_V2DF_V2DF_INT:
36946 case V4SF_FTYPE_V4SF_V2DF_INT:
36947 case V2DF_FTYPE_V2DF_V4SF_INT:
36948 nargs = 3;
36949 break;
36950 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
36951 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
36952 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
36953 case V8DI_FTYPE_V8DF_V8DI_QI_INT:
36954 case V8SF_FTYPE_V8DI_V8SF_QI_INT:
36955 case V8DF_FTYPE_V8DI_V8DF_QI_INT:
36956 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
36957 case V8DI_FTYPE_V8SF_V8DI_QI_INT:
36958 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
36959 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
36960 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
36961 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
36962 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
36963 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
36964 nargs = 4;
36965 break;
36966 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
36967 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
36968 nargs_constant = 2;
36969 nargs = 4;
36970 break;
36971 case INT_FTYPE_V4SF_V4SF_INT_INT:
36972 case INT_FTYPE_V2DF_V2DF_INT_INT:
36973 return ix86_expand_sse_comi_round (d, exp, target);
36974 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
36975 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
36976 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
36977 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
36978 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
36979 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
36980 nargs = 5;
36981 break;
36982 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
36983 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
36984 nargs_constant = 4;
36985 nargs = 5;
36986 break;
36987 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
36988 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
36989 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
36990 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
36991 nargs_constant = 3;
36992 nargs = 5;
36993 break;
36994 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
36995 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
36996 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
36997 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
36998 nargs = 6;
36999 nargs_constant = 4;
37000 break;
37001 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
37002 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
37003 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
37004 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
37005 nargs = 6;
37006 nargs_constant = 3;
37007 break;
37008 default:
37009 gcc_unreachable ();
37011 gcc_assert (nargs <= ARRAY_SIZE (args));
37013 if (optimize
37014 || target == 0
37015 || GET_MODE (target) != tmode
37016 || !insn_p->operand[0].predicate (target, tmode))
37017 target = gen_reg_rtx (tmode);
37019 for (i = 0; i < nargs; i++)
37021 tree arg = CALL_EXPR_ARG (exp, i);
37022 rtx op = expand_normal (arg);
37023 enum machine_mode mode = insn_p->operand[i + 1].mode;
37024 bool match = insn_p->operand[i + 1].predicate (op, mode);
37026 if (i == nargs - nargs_constant)
37028 if (!match)
37030 switch (icode)
37032 case CODE_FOR_avx512f_getmantv8df_mask_round:
37033 case CODE_FOR_avx512f_getmantv16sf_mask_round:
37034 case CODE_FOR_avx512f_vgetmantv2df_round:
37035 case CODE_FOR_avx512f_vgetmantv4sf_round:
37036 error ("the immediate argument must be a 4-bit immediate");
37037 return const0_rtx;
37038 case CODE_FOR_avx512f_cmpv8df3_mask_round:
37039 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
37040 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
37041 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
37042 error ("the immediate argument must be a 5-bit immediate");
37043 return const0_rtx;
37044 default:
37045 error ("the immediate argument must be an 8-bit immediate");
37046 return const0_rtx;
37050 else if (i == nargs-1)
37052 if (!insn_p->operand[nargs].predicate (op, SImode))
37054 error ("incorrect rounding operand");
37055 return const0_rtx;
37058 /* If there is no rounding use normal version of the pattern. */
37059 if (INTVAL (op) == NO_ROUND)
37060 redundant_embed_rnd = 1;
37062 else
37064 if (VECTOR_MODE_P (mode))
37065 op = safe_vector_operand (op, mode);
37067 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37069 if (optimize || !match)
37070 op = copy_to_mode_reg (mode, op);
37072 else
37074 op = copy_to_reg (op);
37075 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37079 args[i].op = op;
37080 args[i].mode = mode;
37083 switch (nargs)
37085 case 1:
37086 pat = GEN_FCN (icode) (target, args[0].op);
37087 break;
37088 case 2:
37089 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37090 break;
37091 case 3:
37092 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37093 args[2].op);
37094 break;
37095 case 4:
37096 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37097 args[2].op, args[3].op);
37098 break;
37099 case 5:
37100 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37101 args[2].op, args[3].op, args[4].op);
37102 case 6:
37103 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
37104 args[2].op, args[3].op, args[4].op,
37105 args[5].op);
37106 break;
37107 default:
37108 gcc_unreachable ();
37111 if (!pat)
37112 return 0;
37114 if (redundant_embed_rnd)
37115 pat = ix86_erase_embedded_rounding (pat);
37117 emit_insn (pat);
37118 return target;
37121 /* Subroutine of ix86_expand_builtin to take care of special insns
37122 with variable number of operands. */
37124 static rtx
37125 ix86_expand_special_args_builtin (const struct builtin_description *d,
37126 tree exp, rtx target)
37128 tree arg;
37129 rtx pat, op;
37130 unsigned int i, nargs, arg_adjust, memory;
37131 bool aligned_mem = false;
37132 struct
37134 rtx op;
37135 enum machine_mode mode;
37136 } args[3];
37137 enum insn_code icode = d->icode;
37138 bool last_arg_constant = false;
37139 const struct insn_data_d *insn_p = &insn_data[icode];
37140 enum machine_mode tmode = insn_p->operand[0].mode;
37141 enum { load, store } klass;
37143 switch ((enum ix86_builtin_func_type) d->flag)
37145 case VOID_FTYPE_VOID:
37146 emit_insn (GEN_FCN (icode) (target));
37147 return 0;
37148 case VOID_FTYPE_UINT64:
37149 case VOID_FTYPE_UNSIGNED:
37150 nargs = 0;
37151 klass = store;
37152 memory = 0;
37153 break;
37155 case INT_FTYPE_VOID:
37156 case USHORT_FTYPE_VOID:
37157 case UINT64_FTYPE_VOID:
37158 case UNSIGNED_FTYPE_VOID:
37159 nargs = 0;
37160 klass = load;
37161 memory = 0;
37162 break;
37163 case UINT64_FTYPE_PUNSIGNED:
37164 case V2DI_FTYPE_PV2DI:
37165 case V4DI_FTYPE_PV4DI:
37166 case V32QI_FTYPE_PCCHAR:
37167 case V16QI_FTYPE_PCCHAR:
37168 case V8SF_FTYPE_PCV4SF:
37169 case V8SF_FTYPE_PCFLOAT:
37170 case V4SF_FTYPE_PCFLOAT:
37171 case V4DF_FTYPE_PCV2DF:
37172 case V4DF_FTYPE_PCDOUBLE:
37173 case V2DF_FTYPE_PCDOUBLE:
37174 case VOID_FTYPE_PVOID:
37175 case V16SI_FTYPE_PV4SI:
37176 case V16SF_FTYPE_PV4SF:
37177 case V8DI_FTYPE_PV4DI:
37178 case V8DI_FTYPE_PV8DI:
37179 case V8DF_FTYPE_PV4DF:
37180 nargs = 1;
37181 klass = load;
37182 memory = 0;
37183 switch (icode)
37185 case CODE_FOR_sse4_1_movntdqa:
37186 case CODE_FOR_avx2_movntdqa:
37187 case CODE_FOR_avx512dq_movntdqa:
37188 aligned_mem = true;
37189 break;
37190 default:
37191 break;
37193 break;
37194 case VOID_FTYPE_PV2SF_V4SF:
37195 case VOID_FTYPE_PV8DI_V8DI:
37196 case VOID_FTYPE_PV4DI_V4DI:
37197 case VOID_FTYPE_PV2DI_V2DI:
37198 case VOID_FTYPE_PCHAR_V32QI:
37199 case VOID_FTYPE_PCHAR_V16QI:
37200 case VOID_FTYPE_PFLOAT_V16SF:
37201 case VOID_FTYPE_PFLOAT_V8SF:
37202 case VOID_FTYPE_PFLOAT_V4SF:
37203 case VOID_FTYPE_PDOUBLE_V8DF:
37204 case VOID_FTYPE_PDOUBLE_V4DF:
37205 case VOID_FTYPE_PDOUBLE_V2DF:
37206 case VOID_FTYPE_PLONGLONG_LONGLONG:
37207 case VOID_FTYPE_PULONGLONG_ULONGLONG:
37208 case VOID_FTYPE_PINT_INT:
37209 nargs = 1;
37210 klass = store;
37211 /* Reserve memory operand for target. */
37212 memory = ARRAY_SIZE (args);
37213 switch (icode)
37215 /* These builtins and instructions require the memory
37216 to be properly aligned. */
37217 case CODE_FOR_avx_movntv4di:
37218 case CODE_FOR_sse2_movntv2di:
37219 case CODE_FOR_avx_movntv8sf:
37220 case CODE_FOR_sse_movntv4sf:
37221 case CODE_FOR_sse4a_vmmovntv4sf:
37222 case CODE_FOR_avx_movntv4df:
37223 case CODE_FOR_sse2_movntv2df:
37224 case CODE_FOR_sse4a_vmmovntv2df:
37225 case CODE_FOR_sse2_movntidi:
37226 case CODE_FOR_sse_movntq:
37227 case CODE_FOR_sse2_movntisi:
37228 case CODE_FOR_avx512f_movntv16sf:
37229 case CODE_FOR_avx512f_movntv8df:
37230 case CODE_FOR_avx512f_movntv8di:
37231 aligned_mem = true;
37232 break;
37233 default:
37234 break;
37236 break;
37237 case V4SF_FTYPE_V4SF_PCV2SF:
37238 case V2DF_FTYPE_V2DF_PCDOUBLE:
37239 nargs = 2;
37240 klass = load;
37241 memory = 1;
37242 break;
37243 case V8SF_FTYPE_PCV8SF_V8SI:
37244 case V4DF_FTYPE_PCV4DF_V4DI:
37245 case V4SF_FTYPE_PCV4SF_V4SI:
37246 case V2DF_FTYPE_PCV2DF_V2DI:
37247 case V8SI_FTYPE_PCV8SI_V8SI:
37248 case V4DI_FTYPE_PCV4DI_V4DI:
37249 case V4SI_FTYPE_PCV4SI_V4SI:
37250 case V2DI_FTYPE_PCV2DI_V2DI:
37251 nargs = 2;
37252 klass = load;
37253 memory = 0;
37254 break;
37255 case VOID_FTYPE_PV8DF_V8DF_QI:
37256 case VOID_FTYPE_PV16SF_V16SF_HI:
37257 case VOID_FTYPE_PV8DI_V8DI_QI:
37258 case VOID_FTYPE_PV4DI_V4DI_QI:
37259 case VOID_FTYPE_PV2DI_V2DI_QI:
37260 case VOID_FTYPE_PV16SI_V16SI_HI:
37261 case VOID_FTYPE_PV8SI_V8SI_QI:
37262 case VOID_FTYPE_PV4SI_V4SI_QI:
37263 switch (icode)
37265 /* These builtins and instructions require the memory
37266 to be properly aligned. */
37267 case CODE_FOR_avx512f_storev16sf_mask:
37268 case CODE_FOR_avx512f_storev16si_mask:
37269 case CODE_FOR_avx512f_storev8df_mask:
37270 case CODE_FOR_avx512f_storev8di_mask:
37271 case CODE_FOR_avx512vl_storev8sf_mask:
37272 case CODE_FOR_avx512vl_storev8si_mask:
37273 case CODE_FOR_avx512vl_storev4df_mask:
37274 case CODE_FOR_avx512vl_storev4di_mask:
37275 case CODE_FOR_avx512vl_storev4sf_mask:
37276 case CODE_FOR_avx512vl_storev4si_mask:
37277 case CODE_FOR_avx512vl_storev2df_mask:
37278 case CODE_FOR_avx512vl_storev2di_mask:
37279 aligned_mem = true;
37280 break;
37281 default:
37282 break;
37284 /* FALLTHRU */
37285 case VOID_FTYPE_PV8SF_V8SI_V8SF:
37286 case VOID_FTYPE_PV4DF_V4DI_V4DF:
37287 case VOID_FTYPE_PV4SF_V4SI_V4SF:
37288 case VOID_FTYPE_PV2DF_V2DI_V2DF:
37289 case VOID_FTYPE_PV8SI_V8SI_V8SI:
37290 case VOID_FTYPE_PV4DI_V4DI_V4DI:
37291 case VOID_FTYPE_PV4SI_V4SI_V4SI:
37292 case VOID_FTYPE_PV2DI_V2DI_V2DI:
37293 case VOID_FTYPE_PDOUBLE_V2DF_QI:
37294 case VOID_FTYPE_PFLOAT_V4SF_QI:
37295 case VOID_FTYPE_PV8SI_V8DI_QI:
37296 case VOID_FTYPE_PV8HI_V8DI_QI:
37297 case VOID_FTYPE_PV16HI_V16SI_HI:
37298 case VOID_FTYPE_PV16QI_V8DI_QI:
37299 case VOID_FTYPE_PV16QI_V16SI_HI:
37300 case VOID_FTYPE_PV4SI_V4DI_QI:
37301 case VOID_FTYPE_PV4SI_V2DI_QI:
37302 case VOID_FTYPE_PV8HI_V4DI_QI:
37303 case VOID_FTYPE_PV8HI_V2DI_QI:
37304 case VOID_FTYPE_PV8HI_V8SI_QI:
37305 case VOID_FTYPE_PV8HI_V4SI_QI:
37306 case VOID_FTYPE_PV16QI_V4DI_QI:
37307 case VOID_FTYPE_PV16QI_V2DI_QI:
37308 case VOID_FTYPE_PV16QI_V8SI_QI:
37309 case VOID_FTYPE_PV16QI_V4SI_QI:
37310 case VOID_FTYPE_PV8HI_V8HI_QI:
37311 case VOID_FTYPE_PV16HI_V16HI_HI:
37312 case VOID_FTYPE_PV32HI_V32HI_SI:
37313 case VOID_FTYPE_PV16QI_V16QI_HI:
37314 case VOID_FTYPE_PV32QI_V32QI_SI:
37315 case VOID_FTYPE_PV64QI_V64QI_DI:
37316 case VOID_FTYPE_PV4DF_V4DF_QI:
37317 case VOID_FTYPE_PV2DF_V2DF_QI:
37318 case VOID_FTYPE_PV8SF_V8SF_QI:
37319 case VOID_FTYPE_PV4SF_V4SF_QI:
37320 nargs = 2;
37321 klass = store;
37322 /* Reserve memory operand for target. */
37323 memory = ARRAY_SIZE (args);
37324 break;
37325 case V4SF_FTYPE_PCV4SF_V4SF_QI:
37326 case V8SF_FTYPE_PCV8SF_V8SF_QI:
37327 case V16SF_FTYPE_PCV16SF_V16SF_HI:
37328 case V4SI_FTYPE_PCV4SI_V4SI_QI:
37329 case V8SI_FTYPE_PCV8SI_V8SI_QI:
37330 case V16SI_FTYPE_PCV16SI_V16SI_HI:
37331 case V2DF_FTYPE_PCV2DF_V2DF_QI:
37332 case V4DF_FTYPE_PCV4DF_V4DF_QI:
37333 case V8DF_FTYPE_PCV8DF_V8DF_QI:
37334 case V2DI_FTYPE_PCV2DI_V2DI_QI:
37335 case V4DI_FTYPE_PCV4DI_V4DI_QI:
37336 case V8DI_FTYPE_PCV8DI_V8DI_QI:
37337 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
37338 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
37339 case V8HI_FTYPE_PCV8HI_V8HI_QI:
37340 case V16HI_FTYPE_PCV16HI_V16HI_HI:
37341 case V32HI_FTYPE_PCV32HI_V32HI_SI:
37342 case V16QI_FTYPE_PCV16QI_V16QI_HI:
37343 case V32QI_FTYPE_PCV32QI_V32QI_SI:
37344 case V64QI_FTYPE_PCV64QI_V64QI_DI:
37345 nargs = 3;
37346 klass = load;
37347 memory = 0;
37348 switch (icode)
37350 /* These builtins and instructions require the memory
37351 to be properly aligned. */
37352 case CODE_FOR_avx512f_loadv16sf_mask:
37353 case CODE_FOR_avx512f_loadv16si_mask:
37354 case CODE_FOR_avx512f_loadv8df_mask:
37355 case CODE_FOR_avx512f_loadv8di_mask:
37356 case CODE_FOR_avx512vl_loadv8sf_mask:
37357 case CODE_FOR_avx512vl_loadv8si_mask:
37358 case CODE_FOR_avx512vl_loadv4df_mask:
37359 case CODE_FOR_avx512vl_loadv4di_mask:
37360 case CODE_FOR_avx512vl_loadv4sf_mask:
37361 case CODE_FOR_avx512vl_loadv4si_mask:
37362 case CODE_FOR_avx512vl_loadv2df_mask:
37363 case CODE_FOR_avx512vl_loadv2di_mask:
37364 case CODE_FOR_avx512bw_loadv64qi_mask:
37365 case CODE_FOR_avx512vl_loadv32qi_mask:
37366 case CODE_FOR_avx512vl_loadv16qi_mask:
37367 case CODE_FOR_avx512bw_loadv32hi_mask:
37368 case CODE_FOR_avx512vl_loadv16hi_mask:
37369 case CODE_FOR_avx512vl_loadv8hi_mask:
37370 aligned_mem = true;
37371 break;
37372 default:
37373 break;
37375 break;
37376 case VOID_FTYPE_UINT_UINT_UINT:
37377 case VOID_FTYPE_UINT64_UINT_UINT:
37378 case UCHAR_FTYPE_UINT_UINT_UINT:
37379 case UCHAR_FTYPE_UINT64_UINT_UINT:
37380 nargs = 3;
37381 klass = load;
37382 memory = ARRAY_SIZE (args);
37383 last_arg_constant = true;
37384 break;
37385 default:
37386 gcc_unreachable ();
37389 gcc_assert (nargs <= ARRAY_SIZE (args));
37391 if (klass == store)
37393 arg = CALL_EXPR_ARG (exp, 0);
37394 op = expand_normal (arg);
37395 gcc_assert (target == 0);
37396 if (memory)
37398 op = ix86_zero_extend_to_Pmode (op);
37399 target = gen_rtx_MEM (tmode, op);
37400 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
37401 on it. Try to improve it using get_pointer_alignment,
37402 and if the special builtin is one that requires strict
37403 mode alignment, also from it's GET_MODE_ALIGNMENT.
37404 Failure to do so could lead to ix86_legitimate_combined_insn
37405 rejecting all changes to such insns. */
37406 unsigned int align = get_pointer_alignment (arg);
37407 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
37408 align = GET_MODE_ALIGNMENT (tmode);
37409 if (MEM_ALIGN (target) < align)
37410 set_mem_align (target, align);
37412 else
37413 target = force_reg (tmode, op);
37414 arg_adjust = 1;
37416 else
37418 arg_adjust = 0;
37419 if (optimize
37420 || target == 0
37421 || !register_operand (target, tmode)
37422 || GET_MODE (target) != tmode)
37423 target = gen_reg_rtx (tmode);
37426 for (i = 0; i < nargs; i++)
37428 enum machine_mode mode = insn_p->operand[i + 1].mode;
37429 bool match;
37431 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
37432 op = expand_normal (arg);
37433 match = insn_p->operand[i + 1].predicate (op, mode);
37435 if (last_arg_constant && (i + 1) == nargs)
37437 if (!match)
37439 if (icode == CODE_FOR_lwp_lwpvalsi3
37440 || icode == CODE_FOR_lwp_lwpinssi3
37441 || icode == CODE_FOR_lwp_lwpvaldi3
37442 || icode == CODE_FOR_lwp_lwpinsdi3)
37443 error ("the last argument must be a 32-bit immediate");
37444 else
37445 error ("the last argument must be an 8-bit immediate");
37446 return const0_rtx;
37449 else
37451 if (i == memory)
37453 /* This must be the memory operand. */
37454 op = ix86_zero_extend_to_Pmode (op);
37455 op = gen_rtx_MEM (mode, op);
37456 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
37457 on it. Try to improve it using get_pointer_alignment,
37458 and if the special builtin is one that requires strict
37459 mode alignment, also from it's GET_MODE_ALIGNMENT.
37460 Failure to do so could lead to ix86_legitimate_combined_insn
37461 rejecting all changes to such insns. */
37462 unsigned int align = get_pointer_alignment (arg);
37463 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
37464 align = GET_MODE_ALIGNMENT (mode);
37465 if (MEM_ALIGN (op) < align)
37466 set_mem_align (op, align);
37468 else
37470 /* This must be register. */
37471 if (VECTOR_MODE_P (mode))
37472 op = safe_vector_operand (op, mode);
37474 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37475 op = copy_to_mode_reg (mode, op);
37476 else
37478 op = copy_to_reg (op);
37479 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37484 args[i].op = op;
37485 args[i].mode = mode;
37488 switch (nargs)
37490 case 0:
37491 pat = GEN_FCN (icode) (target);
37492 break;
37493 case 1:
37494 pat = GEN_FCN (icode) (target, args[0].op);
37495 break;
37496 case 2:
37497 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
37498 break;
37499 case 3:
37500 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
37501 break;
37502 default:
37503 gcc_unreachable ();
37506 if (! pat)
37507 return 0;
37508 emit_insn (pat);
37509 return klass == store ? 0 : target;
37512 /* Return the integer constant in ARG. Constrain it to be in the range
37513 of the subparts of VEC_TYPE; issue an error if not. */
37515 static int
37516 get_element_number (tree vec_type, tree arg)
37518 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
37520 if (!tree_fits_uhwi_p (arg)
37521 || (elt = tree_to_uhwi (arg), elt > max))
37523 error ("selector must be an integer constant in the range 0..%wi", max);
37524 return 0;
37527 return elt;
37530 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37531 ix86_expand_vector_init. We DO have language-level syntax for this, in
37532 the form of (type){ init-list }. Except that since we can't place emms
37533 instructions from inside the compiler, we can't allow the use of MMX
37534 registers unless the user explicitly asks for it. So we do *not* define
37535 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
37536 we have builtins invoked by mmintrin.h that gives us license to emit
37537 these sorts of instructions. */
37539 static rtx
37540 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
37542 enum machine_mode tmode = TYPE_MODE (type);
37543 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
37544 int i, n_elt = GET_MODE_NUNITS (tmode);
37545 rtvec v = rtvec_alloc (n_elt);
37547 gcc_assert (VECTOR_MODE_P (tmode));
37548 gcc_assert (call_expr_nargs (exp) == n_elt);
37550 for (i = 0; i < n_elt; ++i)
37552 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
37553 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
37556 if (!target || !register_operand (target, tmode))
37557 target = gen_reg_rtx (tmode);
37559 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
37560 return target;
37563 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37564 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
37565 had a language-level syntax for referencing vector elements. */
37567 static rtx
37568 ix86_expand_vec_ext_builtin (tree exp, rtx target)
37570 enum machine_mode tmode, mode0;
37571 tree arg0, arg1;
37572 int elt;
37573 rtx op0;
37575 arg0 = CALL_EXPR_ARG (exp, 0);
37576 arg1 = CALL_EXPR_ARG (exp, 1);
37578 op0 = expand_normal (arg0);
37579 elt = get_element_number (TREE_TYPE (arg0), arg1);
37581 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
37582 mode0 = TYPE_MODE (TREE_TYPE (arg0));
37583 gcc_assert (VECTOR_MODE_P (mode0));
37585 op0 = force_reg (mode0, op0);
37587 if (optimize || !target || !register_operand (target, tmode))
37588 target = gen_reg_rtx (tmode);
37590 ix86_expand_vector_extract (true, target, op0, elt);
37592 return target;
37595 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
37596 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
37597 a language-level syntax for referencing vector elements. */
37599 static rtx
37600 ix86_expand_vec_set_builtin (tree exp)
37602 enum machine_mode tmode, mode1;
37603 tree arg0, arg1, arg2;
37604 int elt;
37605 rtx op0, op1, target;
37607 arg0 = CALL_EXPR_ARG (exp, 0);
37608 arg1 = CALL_EXPR_ARG (exp, 1);
37609 arg2 = CALL_EXPR_ARG (exp, 2);
37611 tmode = TYPE_MODE (TREE_TYPE (arg0));
37612 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
37613 gcc_assert (VECTOR_MODE_P (tmode));
37615 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
37616 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
37617 elt = get_element_number (TREE_TYPE (arg0), arg2);
37619 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
37620 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
37622 op0 = force_reg (tmode, op0);
37623 op1 = force_reg (mode1, op1);
37625 /* OP0 is the source of these builtin functions and shouldn't be
37626 modified. Create a copy, use it and return it as target. */
37627 target = gen_reg_rtx (tmode);
37628 emit_move_insn (target, op0);
37629 ix86_expand_vector_set (true, target, op1, elt);
37631 return target;
37634 /* Expand an expression EXP that calls a built-in function,
37635 with result going to TARGET if that's convenient
37636 (and in mode MODE if that's convenient).
37637 SUBTARGET may be used as the target for computing one of EXP's operands.
37638 IGNORE is nonzero if the value is to be ignored. */
37640 static rtx
37641 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
37642 enum machine_mode mode, int ignore)
37644 const struct builtin_description *d;
37645 size_t i;
37646 enum insn_code icode;
37647 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
37648 tree arg0, arg1, arg2, arg3, arg4;
37649 rtx op0, op1, op2, op3, op4, pat, insn;
37650 enum machine_mode mode0, mode1, mode2, mode3, mode4;
37651 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
37653 /* For CPU builtins that can be folded, fold first and expand the fold. */
37654 switch (fcode)
37656 case IX86_BUILTIN_CPU_INIT:
37658 /* Make it call __cpu_indicator_init in libgcc. */
37659 tree call_expr, fndecl, type;
37660 type = build_function_type_list (integer_type_node, NULL_TREE);
37661 fndecl = build_fn_decl ("__cpu_indicator_init", type);
37662 call_expr = build_call_expr (fndecl, 0);
37663 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
37665 case IX86_BUILTIN_CPU_IS:
37666 case IX86_BUILTIN_CPU_SUPPORTS:
37668 tree arg0 = CALL_EXPR_ARG (exp, 0);
37669 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
37670 gcc_assert (fold_expr != NULL_TREE);
37671 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
37675 /* Determine whether the builtin function is available under the current ISA.
37676 Originally the builtin was not created if it wasn't applicable to the
37677 current ISA based on the command line switches. With function specific
37678 options, we need to check in the context of the function making the call
37679 whether it is supported. */
37680 if (ix86_builtins_isa[fcode].isa
37681 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
37683 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
37684 NULL, (enum fpmath_unit) 0, false);
37686 if (!opts)
37687 error ("%qE needs unknown isa option", fndecl);
37688 else
37690 gcc_assert (opts != NULL);
37691 error ("%qE needs isa option %s", fndecl, opts);
37692 free (opts);
37694 return const0_rtx;
37697 switch (fcode)
37699 case IX86_BUILTIN_MASKMOVQ:
37700 case IX86_BUILTIN_MASKMOVDQU:
37701 icode = (fcode == IX86_BUILTIN_MASKMOVQ
37702 ? CODE_FOR_mmx_maskmovq
37703 : CODE_FOR_sse2_maskmovdqu);
37704 /* Note the arg order is different from the operand order. */
37705 arg1 = CALL_EXPR_ARG (exp, 0);
37706 arg2 = CALL_EXPR_ARG (exp, 1);
37707 arg0 = CALL_EXPR_ARG (exp, 2);
37708 op0 = expand_normal (arg0);
37709 op1 = expand_normal (arg1);
37710 op2 = expand_normal (arg2);
37711 mode0 = insn_data[icode].operand[0].mode;
37712 mode1 = insn_data[icode].operand[1].mode;
37713 mode2 = insn_data[icode].operand[2].mode;
37715 op0 = ix86_zero_extend_to_Pmode (op0);
37716 op0 = gen_rtx_MEM (mode1, op0);
37718 if (!insn_data[icode].operand[0].predicate (op0, mode0))
37719 op0 = copy_to_mode_reg (mode0, op0);
37720 if (!insn_data[icode].operand[1].predicate (op1, mode1))
37721 op1 = copy_to_mode_reg (mode1, op1);
37722 if (!insn_data[icode].operand[2].predicate (op2, mode2))
37723 op2 = copy_to_mode_reg (mode2, op2);
37724 pat = GEN_FCN (icode) (op0, op1, op2);
37725 if (! pat)
37726 return 0;
37727 emit_insn (pat);
37728 return 0;
37730 case IX86_BUILTIN_LDMXCSR:
37731 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
37732 target = assign_386_stack_local (SImode, SLOT_TEMP);
37733 emit_move_insn (target, op0);
37734 emit_insn (gen_sse_ldmxcsr (target));
37735 return 0;
37737 case IX86_BUILTIN_STMXCSR:
37738 target = assign_386_stack_local (SImode, SLOT_TEMP);
37739 emit_insn (gen_sse_stmxcsr (target));
37740 return copy_to_mode_reg (SImode, target);
37742 case IX86_BUILTIN_CLFLUSH:
37743 arg0 = CALL_EXPR_ARG (exp, 0);
37744 op0 = expand_normal (arg0);
37745 icode = CODE_FOR_sse2_clflush;
37746 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37747 op0 = ix86_zero_extend_to_Pmode (op0);
37749 emit_insn (gen_sse2_clflush (op0));
37750 return 0;
37752 case IX86_BUILTIN_CLFLUSHOPT:
37753 arg0 = CALL_EXPR_ARG (exp, 0);
37754 op0 = expand_normal (arg0);
37755 icode = CODE_FOR_clflushopt;
37756 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
37757 op0 = ix86_zero_extend_to_Pmode (op0);
37759 emit_insn (gen_clflushopt (op0));
37760 return 0;
37762 case IX86_BUILTIN_MONITOR:
37763 arg0 = CALL_EXPR_ARG (exp, 0);
37764 arg1 = CALL_EXPR_ARG (exp, 1);
37765 arg2 = CALL_EXPR_ARG (exp, 2);
37766 op0 = expand_normal (arg0);
37767 op1 = expand_normal (arg1);
37768 op2 = expand_normal (arg2);
37769 if (!REG_P (op0))
37770 op0 = ix86_zero_extend_to_Pmode (op0);
37771 if (!REG_P (op1))
37772 op1 = copy_to_mode_reg (SImode, op1);
37773 if (!REG_P (op2))
37774 op2 = copy_to_mode_reg (SImode, op2);
37775 emit_insn (ix86_gen_monitor (op0, op1, op2));
37776 return 0;
37778 case IX86_BUILTIN_MWAIT:
37779 arg0 = CALL_EXPR_ARG (exp, 0);
37780 arg1 = CALL_EXPR_ARG (exp, 1);
37781 op0 = expand_normal (arg0);
37782 op1 = expand_normal (arg1);
37783 if (!REG_P (op0))
37784 op0 = copy_to_mode_reg (SImode, op0);
37785 if (!REG_P (op1))
37786 op1 = copy_to_mode_reg (SImode, op1);
37787 emit_insn (gen_sse3_mwait (op0, op1));
37788 return 0;
37790 case IX86_BUILTIN_VEC_INIT_V2SI:
37791 case IX86_BUILTIN_VEC_INIT_V4HI:
37792 case IX86_BUILTIN_VEC_INIT_V8QI:
37793 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
37795 case IX86_BUILTIN_VEC_EXT_V2DF:
37796 case IX86_BUILTIN_VEC_EXT_V2DI:
37797 case IX86_BUILTIN_VEC_EXT_V4SF:
37798 case IX86_BUILTIN_VEC_EXT_V4SI:
37799 case IX86_BUILTIN_VEC_EXT_V8HI:
37800 case IX86_BUILTIN_VEC_EXT_V2SI:
37801 case IX86_BUILTIN_VEC_EXT_V4HI:
37802 case IX86_BUILTIN_VEC_EXT_V16QI:
37803 return ix86_expand_vec_ext_builtin (exp, target);
37805 case IX86_BUILTIN_VEC_SET_V2DI:
37806 case IX86_BUILTIN_VEC_SET_V4SF:
37807 case IX86_BUILTIN_VEC_SET_V4SI:
37808 case IX86_BUILTIN_VEC_SET_V8HI:
37809 case IX86_BUILTIN_VEC_SET_V4HI:
37810 case IX86_BUILTIN_VEC_SET_V16QI:
37811 return ix86_expand_vec_set_builtin (exp);
37813 case IX86_BUILTIN_INFQ:
37814 case IX86_BUILTIN_HUGE_VALQ:
37816 REAL_VALUE_TYPE inf;
37817 rtx tmp;
37819 real_inf (&inf);
37820 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
37822 tmp = validize_mem (force_const_mem (mode, tmp));
37824 if (target == 0)
37825 target = gen_reg_rtx (mode);
37827 emit_move_insn (target, tmp);
37828 return target;
37831 case IX86_BUILTIN_RDPMC:
37832 case IX86_BUILTIN_RDTSC:
37833 case IX86_BUILTIN_RDTSCP:
37835 op0 = gen_reg_rtx (DImode);
37836 op1 = gen_reg_rtx (DImode);
37838 if (fcode == IX86_BUILTIN_RDPMC)
37840 arg0 = CALL_EXPR_ARG (exp, 0);
37841 op2 = expand_normal (arg0);
37842 if (!register_operand (op2, SImode))
37843 op2 = copy_to_mode_reg (SImode, op2);
37845 insn = (TARGET_64BIT
37846 ? gen_rdpmc_rex64 (op0, op1, op2)
37847 : gen_rdpmc (op0, op2));
37848 emit_insn (insn);
37850 else if (fcode == IX86_BUILTIN_RDTSC)
37852 insn = (TARGET_64BIT
37853 ? gen_rdtsc_rex64 (op0, op1)
37854 : gen_rdtsc (op0));
37855 emit_insn (insn);
37857 else
37859 op2 = gen_reg_rtx (SImode);
37861 insn = (TARGET_64BIT
37862 ? gen_rdtscp_rex64 (op0, op1, op2)
37863 : gen_rdtscp (op0, op2));
37864 emit_insn (insn);
37866 arg0 = CALL_EXPR_ARG (exp, 0);
37867 op4 = expand_normal (arg0);
37868 if (!address_operand (op4, VOIDmode))
37870 op4 = convert_memory_address (Pmode, op4);
37871 op4 = copy_addr_to_reg (op4);
37873 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
37876 if (target == 0)
37878 /* mode is VOIDmode if __builtin_rd* has been called
37879 without lhs. */
37880 if (mode == VOIDmode)
37881 return target;
37882 target = gen_reg_rtx (mode);
37885 if (TARGET_64BIT)
37887 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
37888 op1, 1, OPTAB_DIRECT);
37889 op0 = expand_simple_binop (DImode, IOR, op0, op1,
37890 op0, 1, OPTAB_DIRECT);
37893 emit_move_insn (target, op0);
37894 return target;
37896 case IX86_BUILTIN_FXSAVE:
37897 case IX86_BUILTIN_FXRSTOR:
37898 case IX86_BUILTIN_FXSAVE64:
37899 case IX86_BUILTIN_FXRSTOR64:
37900 case IX86_BUILTIN_FNSTENV:
37901 case IX86_BUILTIN_FLDENV:
37902 mode0 = BLKmode;
37903 switch (fcode)
37905 case IX86_BUILTIN_FXSAVE:
37906 icode = CODE_FOR_fxsave;
37907 break;
37908 case IX86_BUILTIN_FXRSTOR:
37909 icode = CODE_FOR_fxrstor;
37910 break;
37911 case IX86_BUILTIN_FXSAVE64:
37912 icode = CODE_FOR_fxsave64;
37913 break;
37914 case IX86_BUILTIN_FXRSTOR64:
37915 icode = CODE_FOR_fxrstor64;
37916 break;
37917 case IX86_BUILTIN_FNSTENV:
37918 icode = CODE_FOR_fnstenv;
37919 break;
37920 case IX86_BUILTIN_FLDENV:
37921 icode = CODE_FOR_fldenv;
37922 break;
37923 default:
37924 gcc_unreachable ();
37927 arg0 = CALL_EXPR_ARG (exp, 0);
37928 op0 = expand_normal (arg0);
37930 if (!address_operand (op0, VOIDmode))
37932 op0 = convert_memory_address (Pmode, op0);
37933 op0 = copy_addr_to_reg (op0);
37935 op0 = gen_rtx_MEM (mode0, op0);
37937 pat = GEN_FCN (icode) (op0);
37938 if (pat)
37939 emit_insn (pat);
37940 return 0;
37942 case IX86_BUILTIN_XSAVE:
37943 case IX86_BUILTIN_XRSTOR:
37944 case IX86_BUILTIN_XSAVE64:
37945 case IX86_BUILTIN_XRSTOR64:
37946 case IX86_BUILTIN_XSAVEOPT:
37947 case IX86_BUILTIN_XSAVEOPT64:
37948 case IX86_BUILTIN_XSAVES:
37949 case IX86_BUILTIN_XRSTORS:
37950 case IX86_BUILTIN_XSAVES64:
37951 case IX86_BUILTIN_XRSTORS64:
37952 case IX86_BUILTIN_XSAVEC:
37953 case IX86_BUILTIN_XSAVEC64:
37954 arg0 = CALL_EXPR_ARG (exp, 0);
37955 arg1 = CALL_EXPR_ARG (exp, 1);
37956 op0 = expand_normal (arg0);
37957 op1 = expand_normal (arg1);
37959 if (!address_operand (op0, VOIDmode))
37961 op0 = convert_memory_address (Pmode, op0);
37962 op0 = copy_addr_to_reg (op0);
37964 op0 = gen_rtx_MEM (BLKmode, op0);
37966 op1 = force_reg (DImode, op1);
37968 if (TARGET_64BIT)
37970 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
37971 NULL, 1, OPTAB_DIRECT);
37972 switch (fcode)
37974 case IX86_BUILTIN_XSAVE:
37975 icode = CODE_FOR_xsave_rex64;
37976 break;
37977 case IX86_BUILTIN_XRSTOR:
37978 icode = CODE_FOR_xrstor_rex64;
37979 break;
37980 case IX86_BUILTIN_XSAVE64:
37981 icode = CODE_FOR_xsave64;
37982 break;
37983 case IX86_BUILTIN_XRSTOR64:
37984 icode = CODE_FOR_xrstor64;
37985 break;
37986 case IX86_BUILTIN_XSAVEOPT:
37987 icode = CODE_FOR_xsaveopt_rex64;
37988 break;
37989 case IX86_BUILTIN_XSAVEOPT64:
37990 icode = CODE_FOR_xsaveopt64;
37991 break;
37992 case IX86_BUILTIN_XSAVES:
37993 icode = CODE_FOR_xsaves_rex64;
37994 break;
37995 case IX86_BUILTIN_XRSTORS:
37996 icode = CODE_FOR_xrstors_rex64;
37997 break;
37998 case IX86_BUILTIN_XSAVES64:
37999 icode = CODE_FOR_xsaves64;
38000 break;
38001 case IX86_BUILTIN_XRSTORS64:
38002 icode = CODE_FOR_xrstors64;
38003 break;
38004 case IX86_BUILTIN_XSAVEC:
38005 icode = CODE_FOR_xsavec_rex64;
38006 break;
38007 case IX86_BUILTIN_XSAVEC64:
38008 icode = CODE_FOR_xsavec64;
38009 break;
38010 default:
38011 gcc_unreachable ();
38014 op2 = gen_lowpart (SImode, op2);
38015 op1 = gen_lowpart (SImode, op1);
38016 pat = GEN_FCN (icode) (op0, op1, op2);
38018 else
38020 switch (fcode)
38022 case IX86_BUILTIN_XSAVE:
38023 icode = CODE_FOR_xsave;
38024 break;
38025 case IX86_BUILTIN_XRSTOR:
38026 icode = CODE_FOR_xrstor;
38027 break;
38028 case IX86_BUILTIN_XSAVEOPT:
38029 icode = CODE_FOR_xsaveopt;
38030 break;
38031 case IX86_BUILTIN_XSAVES:
38032 icode = CODE_FOR_xsaves;
38033 break;
38034 case IX86_BUILTIN_XRSTORS:
38035 icode = CODE_FOR_xrstors;
38036 break;
38037 case IX86_BUILTIN_XSAVEC:
38038 icode = CODE_FOR_xsavec;
38039 break;
38040 default:
38041 gcc_unreachable ();
38043 pat = GEN_FCN (icode) (op0, op1);
38046 if (pat)
38047 emit_insn (pat);
38048 return 0;
38050 case IX86_BUILTIN_LLWPCB:
38051 arg0 = CALL_EXPR_ARG (exp, 0);
38052 op0 = expand_normal (arg0);
38053 icode = CODE_FOR_lwp_llwpcb;
38054 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38055 op0 = ix86_zero_extend_to_Pmode (op0);
38056 emit_insn (gen_lwp_llwpcb (op0));
38057 return 0;
38059 case IX86_BUILTIN_SLWPCB:
38060 icode = CODE_FOR_lwp_slwpcb;
38061 if (!target
38062 || !insn_data[icode].operand[0].predicate (target, Pmode))
38063 target = gen_reg_rtx (Pmode);
38064 emit_insn (gen_lwp_slwpcb (target));
38065 return target;
38067 case IX86_BUILTIN_BEXTRI32:
38068 case IX86_BUILTIN_BEXTRI64:
38069 arg0 = CALL_EXPR_ARG (exp, 0);
38070 arg1 = CALL_EXPR_ARG (exp, 1);
38071 op0 = expand_normal (arg0);
38072 op1 = expand_normal (arg1);
38073 icode = (fcode == IX86_BUILTIN_BEXTRI32
38074 ? CODE_FOR_tbm_bextri_si
38075 : CODE_FOR_tbm_bextri_di);
38076 if (!CONST_INT_P (op1))
38078 error ("last argument must be an immediate");
38079 return const0_rtx;
38081 else
38083 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
38084 unsigned char lsb_index = INTVAL (op1) & 0xFF;
38085 op1 = GEN_INT (length);
38086 op2 = GEN_INT (lsb_index);
38087 pat = GEN_FCN (icode) (target, op0, op1, op2);
38088 if (pat)
38089 emit_insn (pat);
38090 return target;
38093 case IX86_BUILTIN_RDRAND16_STEP:
38094 icode = CODE_FOR_rdrandhi_1;
38095 mode0 = HImode;
38096 goto rdrand_step;
38098 case IX86_BUILTIN_RDRAND32_STEP:
38099 icode = CODE_FOR_rdrandsi_1;
38100 mode0 = SImode;
38101 goto rdrand_step;
38103 case IX86_BUILTIN_RDRAND64_STEP:
38104 icode = CODE_FOR_rdranddi_1;
38105 mode0 = DImode;
38107 rdrand_step:
38108 op0 = gen_reg_rtx (mode0);
38109 emit_insn (GEN_FCN (icode) (op0));
38111 arg0 = CALL_EXPR_ARG (exp, 0);
38112 op1 = expand_normal (arg0);
38113 if (!address_operand (op1, VOIDmode))
38115 op1 = convert_memory_address (Pmode, op1);
38116 op1 = copy_addr_to_reg (op1);
38118 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
38120 op1 = gen_reg_rtx (SImode);
38121 emit_move_insn (op1, CONST1_RTX (SImode));
38123 /* Emit SImode conditional move. */
38124 if (mode0 == HImode)
38126 op2 = gen_reg_rtx (SImode);
38127 emit_insn (gen_zero_extendhisi2 (op2, op0));
38129 else if (mode0 == SImode)
38130 op2 = op0;
38131 else
38132 op2 = gen_rtx_SUBREG (SImode, op0, 0);
38134 if (target == 0
38135 || !register_operand (target, SImode))
38136 target = gen_reg_rtx (SImode);
38138 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
38139 const0_rtx);
38140 emit_insn (gen_rtx_SET (VOIDmode, target,
38141 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
38142 return target;
38144 case IX86_BUILTIN_RDSEED16_STEP:
38145 icode = CODE_FOR_rdseedhi_1;
38146 mode0 = HImode;
38147 goto rdseed_step;
38149 case IX86_BUILTIN_RDSEED32_STEP:
38150 icode = CODE_FOR_rdseedsi_1;
38151 mode0 = SImode;
38152 goto rdseed_step;
38154 case IX86_BUILTIN_RDSEED64_STEP:
38155 icode = CODE_FOR_rdseeddi_1;
38156 mode0 = DImode;
38158 rdseed_step:
38159 op0 = gen_reg_rtx (mode0);
38160 emit_insn (GEN_FCN (icode) (op0));
38162 arg0 = CALL_EXPR_ARG (exp, 0);
38163 op1 = expand_normal (arg0);
38164 if (!address_operand (op1, VOIDmode))
38166 op1 = convert_memory_address (Pmode, op1);
38167 op1 = copy_addr_to_reg (op1);
38169 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
38171 op2 = gen_reg_rtx (QImode);
38173 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
38174 const0_rtx);
38175 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
38177 if (target == 0
38178 || !register_operand (target, SImode))
38179 target = gen_reg_rtx (SImode);
38181 emit_insn (gen_zero_extendqisi2 (target, op2));
38182 return target;
38184 case IX86_BUILTIN_ADDCARRYX32:
38185 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
38186 mode0 = SImode;
38187 goto addcarryx;
38189 case IX86_BUILTIN_ADDCARRYX64:
38190 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
38191 mode0 = DImode;
38193 addcarryx:
38194 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
38195 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
38196 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
38197 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
38199 op0 = gen_reg_rtx (QImode);
38201 /* Generate CF from input operand. */
38202 op1 = expand_normal (arg0);
38203 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
38204 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
38206 /* Gen ADCX instruction to compute X+Y+CF. */
38207 op2 = expand_normal (arg1);
38208 op3 = expand_normal (arg2);
38210 if (!REG_P (op2))
38211 op2 = copy_to_mode_reg (mode0, op2);
38212 if (!REG_P (op3))
38213 op3 = copy_to_mode_reg (mode0, op3);
38215 op0 = gen_reg_rtx (mode0);
38217 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
38218 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
38219 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
38221 /* Store the result. */
38222 op4 = expand_normal (arg3);
38223 if (!address_operand (op4, VOIDmode))
38225 op4 = convert_memory_address (Pmode, op4);
38226 op4 = copy_addr_to_reg (op4);
38228 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
38230 /* Return current CF value. */
38231 if (target == 0)
38232 target = gen_reg_rtx (QImode);
38234 PUT_MODE (pat, QImode);
38235 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
38236 return target;
38238 case IX86_BUILTIN_READ_FLAGS:
38239 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
38241 if (optimize
38242 || target == NULL_RTX
38243 || !nonimmediate_operand (target, word_mode)
38244 || GET_MODE (target) != word_mode)
38245 target = gen_reg_rtx (word_mode);
38247 emit_insn (gen_pop (target));
38248 return target;
38250 case IX86_BUILTIN_WRITE_FLAGS:
38252 arg0 = CALL_EXPR_ARG (exp, 0);
38253 op0 = expand_normal (arg0);
38254 if (!general_no_elim_operand (op0, word_mode))
38255 op0 = copy_to_mode_reg (word_mode, op0);
38257 emit_insn (gen_push (op0));
38258 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
38259 return 0;
38261 case IX86_BUILTIN_KORTESTC16:
38262 icode = CODE_FOR_kortestchi;
38263 mode0 = HImode;
38264 mode1 = CCCmode;
38265 goto kortest;
38267 case IX86_BUILTIN_KORTESTZ16:
38268 icode = CODE_FOR_kortestzhi;
38269 mode0 = HImode;
38270 mode1 = CCZmode;
38272 kortest:
38273 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
38274 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
38275 op0 = expand_normal (arg0);
38276 op1 = expand_normal (arg1);
38278 op0 = copy_to_reg (op0);
38279 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
38280 op1 = copy_to_reg (op1);
38281 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
38283 target = gen_reg_rtx (QImode);
38284 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
38286 /* Emit kortest. */
38287 emit_insn (GEN_FCN (icode) (op0, op1));
38288 /* And use setcc to return result from flags. */
38289 ix86_expand_setcc (target, EQ,
38290 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
38291 return target;
38293 case IX86_BUILTIN_GATHERSIV2DF:
38294 icode = CODE_FOR_avx2_gathersiv2df;
38295 goto gather_gen;
38296 case IX86_BUILTIN_GATHERSIV4DF:
38297 icode = CODE_FOR_avx2_gathersiv4df;
38298 goto gather_gen;
38299 case IX86_BUILTIN_GATHERDIV2DF:
38300 icode = CODE_FOR_avx2_gatherdiv2df;
38301 goto gather_gen;
38302 case IX86_BUILTIN_GATHERDIV4DF:
38303 icode = CODE_FOR_avx2_gatherdiv4df;
38304 goto gather_gen;
38305 case IX86_BUILTIN_GATHERSIV4SF:
38306 icode = CODE_FOR_avx2_gathersiv4sf;
38307 goto gather_gen;
38308 case IX86_BUILTIN_GATHERSIV8SF:
38309 icode = CODE_FOR_avx2_gathersiv8sf;
38310 goto gather_gen;
38311 case IX86_BUILTIN_GATHERDIV4SF:
38312 icode = CODE_FOR_avx2_gatherdiv4sf;
38313 goto gather_gen;
38314 case IX86_BUILTIN_GATHERDIV8SF:
38315 icode = CODE_FOR_avx2_gatherdiv8sf;
38316 goto gather_gen;
38317 case IX86_BUILTIN_GATHERSIV2DI:
38318 icode = CODE_FOR_avx2_gathersiv2di;
38319 goto gather_gen;
38320 case IX86_BUILTIN_GATHERSIV4DI:
38321 icode = CODE_FOR_avx2_gathersiv4di;
38322 goto gather_gen;
38323 case IX86_BUILTIN_GATHERDIV2DI:
38324 icode = CODE_FOR_avx2_gatherdiv2di;
38325 goto gather_gen;
38326 case IX86_BUILTIN_GATHERDIV4DI:
38327 icode = CODE_FOR_avx2_gatherdiv4di;
38328 goto gather_gen;
38329 case IX86_BUILTIN_GATHERSIV4SI:
38330 icode = CODE_FOR_avx2_gathersiv4si;
38331 goto gather_gen;
38332 case IX86_BUILTIN_GATHERSIV8SI:
38333 icode = CODE_FOR_avx2_gathersiv8si;
38334 goto gather_gen;
38335 case IX86_BUILTIN_GATHERDIV4SI:
38336 icode = CODE_FOR_avx2_gatherdiv4si;
38337 goto gather_gen;
38338 case IX86_BUILTIN_GATHERDIV8SI:
38339 icode = CODE_FOR_avx2_gatherdiv8si;
38340 goto gather_gen;
38341 case IX86_BUILTIN_GATHERALTSIV4DF:
38342 icode = CODE_FOR_avx2_gathersiv4df;
38343 goto gather_gen;
38344 case IX86_BUILTIN_GATHERALTDIV8SF:
38345 icode = CODE_FOR_avx2_gatherdiv8sf;
38346 goto gather_gen;
38347 case IX86_BUILTIN_GATHERALTSIV4DI:
38348 icode = CODE_FOR_avx2_gathersiv4di;
38349 goto gather_gen;
38350 case IX86_BUILTIN_GATHERALTDIV8SI:
38351 icode = CODE_FOR_avx2_gatherdiv8si;
38352 goto gather_gen;
38353 case IX86_BUILTIN_GATHER3SIV16SF:
38354 icode = CODE_FOR_avx512f_gathersiv16sf;
38355 goto gather_gen;
38356 case IX86_BUILTIN_GATHER3SIV8DF:
38357 icode = CODE_FOR_avx512f_gathersiv8df;
38358 goto gather_gen;
38359 case IX86_BUILTIN_GATHER3DIV16SF:
38360 icode = CODE_FOR_avx512f_gatherdiv16sf;
38361 goto gather_gen;
38362 case IX86_BUILTIN_GATHER3DIV8DF:
38363 icode = CODE_FOR_avx512f_gatherdiv8df;
38364 goto gather_gen;
38365 case IX86_BUILTIN_GATHER3SIV16SI:
38366 icode = CODE_FOR_avx512f_gathersiv16si;
38367 goto gather_gen;
38368 case IX86_BUILTIN_GATHER3SIV8DI:
38369 icode = CODE_FOR_avx512f_gathersiv8di;
38370 goto gather_gen;
38371 case IX86_BUILTIN_GATHER3DIV16SI:
38372 icode = CODE_FOR_avx512f_gatherdiv16si;
38373 goto gather_gen;
38374 case IX86_BUILTIN_GATHER3DIV8DI:
38375 icode = CODE_FOR_avx512f_gatherdiv8di;
38376 goto gather_gen;
38377 case IX86_BUILTIN_GATHER3ALTSIV8DF:
38378 icode = CODE_FOR_avx512f_gathersiv8df;
38379 goto gather_gen;
38380 case IX86_BUILTIN_GATHER3ALTDIV16SF:
38381 icode = CODE_FOR_avx512f_gatherdiv16sf;
38382 goto gather_gen;
38383 case IX86_BUILTIN_GATHER3ALTSIV8DI:
38384 icode = CODE_FOR_avx512f_gathersiv8di;
38385 goto gather_gen;
38386 case IX86_BUILTIN_GATHER3ALTDIV16SI:
38387 icode = CODE_FOR_avx512f_gatherdiv16si;
38388 goto gather_gen;
38389 case IX86_BUILTIN_GATHER3SIV2DF:
38390 icode = CODE_FOR_avx512vl_gathersiv2df;
38391 goto gather_gen;
38392 case IX86_BUILTIN_GATHER3SIV4DF:
38393 icode = CODE_FOR_avx512vl_gathersiv4df;
38394 goto gather_gen;
38395 case IX86_BUILTIN_GATHER3DIV2DF:
38396 icode = CODE_FOR_avx512vl_gatherdiv2df;
38397 goto gather_gen;
38398 case IX86_BUILTIN_GATHER3DIV4DF:
38399 icode = CODE_FOR_avx512vl_gatherdiv4df;
38400 goto gather_gen;
38401 case IX86_BUILTIN_GATHER3SIV4SF:
38402 icode = CODE_FOR_avx512vl_gathersiv4sf;
38403 goto gather_gen;
38404 case IX86_BUILTIN_GATHER3SIV8SF:
38405 icode = CODE_FOR_avx512vl_gathersiv8sf;
38406 goto gather_gen;
38407 case IX86_BUILTIN_GATHER3DIV4SF:
38408 icode = CODE_FOR_avx512vl_gatherdiv4sf;
38409 goto gather_gen;
38410 case IX86_BUILTIN_GATHER3DIV8SF:
38411 icode = CODE_FOR_avx512vl_gatherdiv8sf;
38412 goto gather_gen;
38413 case IX86_BUILTIN_GATHER3SIV2DI:
38414 icode = CODE_FOR_avx512vl_gathersiv2di;
38415 goto gather_gen;
38416 case IX86_BUILTIN_GATHER3SIV4DI:
38417 icode = CODE_FOR_avx512vl_gathersiv4di;
38418 goto gather_gen;
38419 case IX86_BUILTIN_GATHER3DIV2DI:
38420 icode = CODE_FOR_avx512vl_gatherdiv2di;
38421 goto gather_gen;
38422 case IX86_BUILTIN_GATHER3DIV4DI:
38423 icode = CODE_FOR_avx512vl_gatherdiv4di;
38424 goto gather_gen;
38425 case IX86_BUILTIN_GATHER3SIV4SI:
38426 icode = CODE_FOR_avx512vl_gathersiv4si;
38427 goto gather_gen;
38428 case IX86_BUILTIN_GATHER3SIV8SI:
38429 icode = CODE_FOR_avx512vl_gathersiv8si;
38430 goto gather_gen;
38431 case IX86_BUILTIN_GATHER3DIV4SI:
38432 icode = CODE_FOR_avx512vl_gatherdiv4si;
38433 goto gather_gen;
38434 case IX86_BUILTIN_GATHER3DIV8SI:
38435 icode = CODE_FOR_avx512vl_gatherdiv8si;
38436 goto gather_gen;
38437 case IX86_BUILTIN_GATHER3ALTSIV4DF:
38438 icode = CODE_FOR_avx512vl_gathersiv4df;
38439 goto gather_gen;
38440 case IX86_BUILTIN_GATHER3ALTDIV8SF:
38441 icode = CODE_FOR_avx512vl_gatherdiv8sf;
38442 goto gather_gen;
38443 case IX86_BUILTIN_GATHER3ALTSIV4DI:
38444 icode = CODE_FOR_avx512vl_gathersiv4di;
38445 goto gather_gen;
38446 case IX86_BUILTIN_GATHER3ALTDIV8SI:
38447 icode = CODE_FOR_avx512vl_gatherdiv8si;
38448 goto gather_gen;
38449 case IX86_BUILTIN_SCATTERSIV16SF:
38450 icode = CODE_FOR_avx512f_scattersiv16sf;
38451 goto scatter_gen;
38452 case IX86_BUILTIN_SCATTERSIV8DF:
38453 icode = CODE_FOR_avx512f_scattersiv8df;
38454 goto scatter_gen;
38455 case IX86_BUILTIN_SCATTERDIV16SF:
38456 icode = CODE_FOR_avx512f_scatterdiv16sf;
38457 goto scatter_gen;
38458 case IX86_BUILTIN_SCATTERDIV8DF:
38459 icode = CODE_FOR_avx512f_scatterdiv8df;
38460 goto scatter_gen;
38461 case IX86_BUILTIN_SCATTERSIV16SI:
38462 icode = CODE_FOR_avx512f_scattersiv16si;
38463 goto scatter_gen;
38464 case IX86_BUILTIN_SCATTERSIV8DI:
38465 icode = CODE_FOR_avx512f_scattersiv8di;
38466 goto scatter_gen;
38467 case IX86_BUILTIN_SCATTERDIV16SI:
38468 icode = CODE_FOR_avx512f_scatterdiv16si;
38469 goto scatter_gen;
38470 case IX86_BUILTIN_SCATTERDIV8DI:
38471 icode = CODE_FOR_avx512f_scatterdiv8di;
38472 goto scatter_gen;
38473 case IX86_BUILTIN_SCATTERSIV8SF:
38474 icode = CODE_FOR_avx512vl_scattersiv8sf;
38475 goto scatter_gen;
38476 case IX86_BUILTIN_SCATTERSIV4SF:
38477 icode = CODE_FOR_avx512vl_scattersiv4sf;
38478 goto scatter_gen;
38479 case IX86_BUILTIN_SCATTERSIV4DF:
38480 icode = CODE_FOR_avx512vl_scattersiv4df;
38481 goto scatter_gen;
38482 case IX86_BUILTIN_SCATTERSIV2DF:
38483 icode = CODE_FOR_avx512vl_scattersiv2df;
38484 goto scatter_gen;
38485 case IX86_BUILTIN_SCATTERDIV8SF:
38486 icode = CODE_FOR_avx512vl_scatterdiv8sf;
38487 goto scatter_gen;
38488 case IX86_BUILTIN_SCATTERDIV4SF:
38489 icode = CODE_FOR_avx512vl_scatterdiv4sf;
38490 goto scatter_gen;
38491 case IX86_BUILTIN_SCATTERDIV4DF:
38492 icode = CODE_FOR_avx512vl_scatterdiv4df;
38493 goto scatter_gen;
38494 case IX86_BUILTIN_SCATTERDIV2DF:
38495 icode = CODE_FOR_avx512vl_scatterdiv2df;
38496 goto scatter_gen;
38497 case IX86_BUILTIN_SCATTERSIV8SI:
38498 icode = CODE_FOR_avx512vl_scattersiv8si;
38499 goto scatter_gen;
38500 case IX86_BUILTIN_SCATTERSIV4SI:
38501 icode = CODE_FOR_avx512vl_scattersiv4si;
38502 goto scatter_gen;
38503 case IX86_BUILTIN_SCATTERSIV4DI:
38504 icode = CODE_FOR_avx512vl_scattersiv4di;
38505 goto scatter_gen;
38506 case IX86_BUILTIN_SCATTERSIV2DI:
38507 icode = CODE_FOR_avx512vl_scattersiv2di;
38508 goto scatter_gen;
38509 case IX86_BUILTIN_SCATTERDIV8SI:
38510 icode = CODE_FOR_avx512vl_scatterdiv8si;
38511 goto scatter_gen;
38512 case IX86_BUILTIN_SCATTERDIV4SI:
38513 icode = CODE_FOR_avx512vl_scatterdiv4si;
38514 goto scatter_gen;
38515 case IX86_BUILTIN_SCATTERDIV4DI:
38516 icode = CODE_FOR_avx512vl_scatterdiv4di;
38517 goto scatter_gen;
38518 case IX86_BUILTIN_SCATTERDIV2DI:
38519 icode = CODE_FOR_avx512vl_scatterdiv2di;
38520 goto scatter_gen;
38521 case IX86_BUILTIN_GATHERPFDPD:
38522 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
38523 goto vec_prefetch_gen;
38524 case IX86_BUILTIN_GATHERPFDPS:
38525 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
38526 goto vec_prefetch_gen;
38527 case IX86_BUILTIN_GATHERPFQPD:
38528 icode = CODE_FOR_avx512pf_gatherpfv8didf;
38529 goto vec_prefetch_gen;
38530 case IX86_BUILTIN_GATHERPFQPS:
38531 icode = CODE_FOR_avx512pf_gatherpfv8disf;
38532 goto vec_prefetch_gen;
38533 case IX86_BUILTIN_SCATTERPFDPD:
38534 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
38535 goto vec_prefetch_gen;
38536 case IX86_BUILTIN_SCATTERPFDPS:
38537 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
38538 goto vec_prefetch_gen;
38539 case IX86_BUILTIN_SCATTERPFQPD:
38540 icode = CODE_FOR_avx512pf_scatterpfv8didf;
38541 goto vec_prefetch_gen;
38542 case IX86_BUILTIN_SCATTERPFQPS:
38543 icode = CODE_FOR_avx512pf_scatterpfv8disf;
38544 goto vec_prefetch_gen;
38546 gather_gen:
38547 rtx half;
38548 rtx (*gen) (rtx, rtx);
38550 arg0 = CALL_EXPR_ARG (exp, 0);
38551 arg1 = CALL_EXPR_ARG (exp, 1);
38552 arg2 = CALL_EXPR_ARG (exp, 2);
38553 arg3 = CALL_EXPR_ARG (exp, 3);
38554 arg4 = CALL_EXPR_ARG (exp, 4);
38555 op0 = expand_normal (arg0);
38556 op1 = expand_normal (arg1);
38557 op2 = expand_normal (arg2);
38558 op3 = expand_normal (arg3);
38559 op4 = expand_normal (arg4);
38560 /* Note the arg order is different from the operand order. */
38561 mode0 = insn_data[icode].operand[1].mode;
38562 mode2 = insn_data[icode].operand[3].mode;
38563 mode3 = insn_data[icode].operand[4].mode;
38564 mode4 = insn_data[icode].operand[5].mode;
38566 if (target == NULL_RTX
38567 || GET_MODE (target) != insn_data[icode].operand[0].mode
38568 || !insn_data[icode].operand[0].predicate (target,
38569 GET_MODE (target)))
38570 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
38571 else
38572 subtarget = target;
38574 switch (fcode)
38576 case IX86_BUILTIN_GATHER3ALTSIV8DF:
38577 case IX86_BUILTIN_GATHER3ALTSIV8DI:
38578 half = gen_reg_rtx (V8SImode);
38579 if (!nonimmediate_operand (op2, V16SImode))
38580 op2 = copy_to_mode_reg (V16SImode, op2);
38581 emit_insn (gen_vec_extract_lo_v16si (half, op2));
38582 op2 = half;
38583 break;
38584 case IX86_BUILTIN_GATHER3ALTSIV4DF:
38585 case IX86_BUILTIN_GATHER3ALTSIV4DI:
38586 case IX86_BUILTIN_GATHERALTSIV4DF:
38587 case IX86_BUILTIN_GATHERALTSIV4DI:
38588 half = gen_reg_rtx (V4SImode);
38589 if (!nonimmediate_operand (op2, V8SImode))
38590 op2 = copy_to_mode_reg (V8SImode, op2);
38591 emit_insn (gen_vec_extract_lo_v8si (half, op2));
38592 op2 = half;
38593 break;
38594 case IX86_BUILTIN_GATHER3ALTDIV16SF:
38595 case IX86_BUILTIN_GATHER3ALTDIV16SI:
38596 half = gen_reg_rtx (mode0);
38597 if (mode0 == V8SFmode)
38598 gen = gen_vec_extract_lo_v16sf;
38599 else
38600 gen = gen_vec_extract_lo_v16si;
38601 if (!nonimmediate_operand (op0, GET_MODE (op0)))
38602 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38603 emit_insn (gen (half, op0));
38604 op0 = half;
38605 if (GET_MODE (op3) != VOIDmode)
38607 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38608 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38609 emit_insn (gen (half, op3));
38610 op3 = half;
38612 break;
38613 case IX86_BUILTIN_GATHER3ALTDIV8SF:
38614 case IX86_BUILTIN_GATHER3ALTDIV8SI:
38615 case IX86_BUILTIN_GATHERALTDIV8SF:
38616 case IX86_BUILTIN_GATHERALTDIV8SI:
38617 half = gen_reg_rtx (mode0);
38618 if (mode0 == V4SFmode)
38619 gen = gen_vec_extract_lo_v8sf;
38620 else
38621 gen = gen_vec_extract_lo_v8si;
38622 if (!nonimmediate_operand (op0, GET_MODE (op0)))
38623 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
38624 emit_insn (gen (half, op0));
38625 op0 = half;
38626 if (GET_MODE (op3) != VOIDmode)
38628 if (!nonimmediate_operand (op3, GET_MODE (op3)))
38629 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
38630 emit_insn (gen (half, op3));
38631 op3 = half;
38633 break;
38634 default:
38635 break;
38638 /* Force memory operand only with base register here. But we
38639 don't want to do it on memory operand for other builtin
38640 functions. */
38641 op1 = ix86_zero_extend_to_Pmode (op1);
38643 if (!insn_data[icode].operand[1].predicate (op0, mode0))
38644 op0 = copy_to_mode_reg (mode0, op0);
38645 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
38646 op1 = copy_to_mode_reg (Pmode, op1);
38647 if (!insn_data[icode].operand[3].predicate (op2, mode2))
38648 op2 = copy_to_mode_reg (mode2, op2);
38649 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
38651 if (!insn_data[icode].operand[4].predicate (op3, mode3))
38652 op3 = copy_to_mode_reg (mode3, op3);
38654 else
38656 op3 = copy_to_reg (op3);
38657 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
38659 if (!insn_data[icode].operand[5].predicate (op4, mode4))
38661 error ("the last argument must be scale 1, 2, 4, 8");
38662 return const0_rtx;
38665 /* Optimize. If mask is known to have all high bits set,
38666 replace op0 with pc_rtx to signal that the instruction
38667 overwrites the whole destination and doesn't use its
38668 previous contents. */
38669 if (optimize)
38671 if (TREE_CODE (arg3) == INTEGER_CST)
38673 if (integer_all_onesp (arg3))
38674 op0 = pc_rtx;
38676 else if (TREE_CODE (arg3) == VECTOR_CST)
38678 unsigned int negative = 0;
38679 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
38681 tree cst = VECTOR_CST_ELT (arg3, i);
38682 if (TREE_CODE (cst) == INTEGER_CST
38683 && tree_int_cst_sign_bit (cst))
38684 negative++;
38685 else if (TREE_CODE (cst) == REAL_CST
38686 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
38687 negative++;
38689 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
38690 op0 = pc_rtx;
38692 else if (TREE_CODE (arg3) == SSA_NAME
38693 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
38695 /* Recognize also when mask is like:
38696 __v2df src = _mm_setzero_pd ();
38697 __v2df mask = _mm_cmpeq_pd (src, src);
38699 __v8sf src = _mm256_setzero_ps ();
38700 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
38701 as that is a cheaper way to load all ones into
38702 a register than having to load a constant from
38703 memory. */
38704 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
38705 if (is_gimple_call (def_stmt))
38707 tree fndecl = gimple_call_fndecl (def_stmt);
38708 if (fndecl
38709 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
38710 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
38712 case IX86_BUILTIN_CMPPD:
38713 case IX86_BUILTIN_CMPPS:
38714 case IX86_BUILTIN_CMPPD256:
38715 case IX86_BUILTIN_CMPPS256:
38716 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
38717 break;
38718 /* FALLTHRU */
38719 case IX86_BUILTIN_CMPEQPD:
38720 case IX86_BUILTIN_CMPEQPS:
38721 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
38722 && initializer_zerop (gimple_call_arg (def_stmt,
38723 1)))
38724 op0 = pc_rtx;
38725 break;
38726 default:
38727 break;
38733 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
38734 if (! pat)
38735 return const0_rtx;
38736 emit_insn (pat);
38738 switch (fcode)
38740 case IX86_BUILTIN_GATHER3DIV16SF:
38741 if (target == NULL_RTX)
38742 target = gen_reg_rtx (V8SFmode);
38743 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
38744 break;
38745 case IX86_BUILTIN_GATHER3DIV16SI:
38746 if (target == NULL_RTX)
38747 target = gen_reg_rtx (V8SImode);
38748 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
38749 break;
38750 case IX86_BUILTIN_GATHER3DIV8SF:
38751 case IX86_BUILTIN_GATHERDIV8SF:
38752 if (target == NULL_RTX)
38753 target = gen_reg_rtx (V4SFmode);
38754 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
38755 break;
38756 case IX86_BUILTIN_GATHER3DIV8SI:
38757 case IX86_BUILTIN_GATHERDIV8SI:
38758 if (target == NULL_RTX)
38759 target = gen_reg_rtx (V4SImode);
38760 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
38761 break;
38762 default:
38763 target = subtarget;
38764 break;
38766 return target;
38768 scatter_gen:
38769 arg0 = CALL_EXPR_ARG (exp, 0);
38770 arg1 = CALL_EXPR_ARG (exp, 1);
38771 arg2 = CALL_EXPR_ARG (exp, 2);
38772 arg3 = CALL_EXPR_ARG (exp, 3);
38773 arg4 = CALL_EXPR_ARG (exp, 4);
38774 op0 = expand_normal (arg0);
38775 op1 = expand_normal (arg1);
38776 op2 = expand_normal (arg2);
38777 op3 = expand_normal (arg3);
38778 op4 = expand_normal (arg4);
38779 mode1 = insn_data[icode].operand[1].mode;
38780 mode2 = insn_data[icode].operand[2].mode;
38781 mode3 = insn_data[icode].operand[3].mode;
38782 mode4 = insn_data[icode].operand[4].mode;
38784 /* Force memory operand only with base register here. But we
38785 don't want to do it on memory operand for other builtin
38786 functions. */
38787 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
38789 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
38790 op0 = copy_to_mode_reg (Pmode, op0);
38792 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
38794 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38795 op1 = copy_to_mode_reg (mode1, op1);
38797 else
38799 op1 = copy_to_reg (op1);
38800 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
38803 if (!insn_data[icode].operand[2].predicate (op2, mode2))
38804 op2 = copy_to_mode_reg (mode2, op2);
38806 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38807 op3 = copy_to_mode_reg (mode3, op3);
38809 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38811 error ("the last argument must be scale 1, 2, 4, 8");
38812 return const0_rtx;
38815 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38816 if (! pat)
38817 return const0_rtx;
38819 emit_insn (pat);
38820 return 0;
38822 vec_prefetch_gen:
38823 arg0 = CALL_EXPR_ARG (exp, 0);
38824 arg1 = CALL_EXPR_ARG (exp, 1);
38825 arg2 = CALL_EXPR_ARG (exp, 2);
38826 arg3 = CALL_EXPR_ARG (exp, 3);
38827 arg4 = CALL_EXPR_ARG (exp, 4);
38828 op0 = expand_normal (arg0);
38829 op1 = expand_normal (arg1);
38830 op2 = expand_normal (arg2);
38831 op3 = expand_normal (arg3);
38832 op4 = expand_normal (arg4);
38833 mode0 = insn_data[icode].operand[0].mode;
38834 mode1 = insn_data[icode].operand[1].mode;
38835 mode3 = insn_data[icode].operand[3].mode;
38836 mode4 = insn_data[icode].operand[4].mode;
38838 if (GET_MODE (op0) == mode0
38839 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
38841 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38842 op0 = copy_to_mode_reg (mode0, op0);
38844 else if (op0 != constm1_rtx)
38846 op0 = copy_to_reg (op0);
38847 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
38850 if (!insn_data[icode].operand[1].predicate (op1, mode1))
38851 op1 = copy_to_mode_reg (mode1, op1);
38853 /* Force memory operand only with base register here. But we
38854 don't want to do it on memory operand for other builtin
38855 functions. */
38856 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
38858 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
38859 op2 = copy_to_mode_reg (Pmode, op2);
38861 if (!insn_data[icode].operand[3].predicate (op3, mode3))
38863 error ("the forth argument must be scale 1, 2, 4, 8");
38864 return const0_rtx;
38867 if (!insn_data[icode].operand[4].predicate (op4, mode4))
38869 error ("incorrect hint operand");
38870 return const0_rtx;
38873 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
38874 if (! pat)
38875 return const0_rtx;
38877 emit_insn (pat);
38879 return 0;
38881 case IX86_BUILTIN_XABORT:
38882 icode = CODE_FOR_xabort;
38883 arg0 = CALL_EXPR_ARG (exp, 0);
38884 op0 = expand_normal (arg0);
38885 mode0 = insn_data[icode].operand[0].mode;
38886 if (!insn_data[icode].operand[0].predicate (op0, mode0))
38888 error ("the xabort's argument must be an 8-bit immediate");
38889 return const0_rtx;
38891 emit_insn (gen_xabort (op0));
38892 return 0;
38894 default:
38895 break;
38898 for (i = 0, d = bdesc_special_args;
38899 i < ARRAY_SIZE (bdesc_special_args);
38900 i++, d++)
38901 if (d->code == fcode)
38902 return ix86_expand_special_args_builtin (d, exp, target);
38904 for (i = 0, d = bdesc_args;
38905 i < ARRAY_SIZE (bdesc_args);
38906 i++, d++)
38907 if (d->code == fcode)
38908 switch (fcode)
38910 case IX86_BUILTIN_FABSQ:
38911 case IX86_BUILTIN_COPYSIGNQ:
38912 if (!TARGET_SSE)
38913 /* Emit a normal call if SSE isn't available. */
38914 return expand_call (exp, target, ignore);
38915 default:
38916 return ix86_expand_args_builtin (d, exp, target);
38919 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
38920 if (d->code == fcode)
38921 return ix86_expand_sse_comi (d, exp, target);
38923 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
38924 if (d->code == fcode)
38925 return ix86_expand_round_builtin (d, exp, target);
38927 for (i = 0, d = bdesc_pcmpestr;
38928 i < ARRAY_SIZE (bdesc_pcmpestr);
38929 i++, d++)
38930 if (d->code == fcode)
38931 return ix86_expand_sse_pcmpestr (d, exp, target);
38933 for (i = 0, d = bdesc_pcmpistr;
38934 i < ARRAY_SIZE (bdesc_pcmpistr);
38935 i++, d++)
38936 if (d->code == fcode)
38937 return ix86_expand_sse_pcmpistr (d, exp, target);
38939 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
38940 if (d->code == fcode)
38941 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
38942 (enum ix86_builtin_func_type)
38943 d->flag, d->comparison);
38945 gcc_unreachable ();
38948 /* This returns the target-specific builtin with code CODE if
38949 current_function_decl has visibility on this builtin, which is checked
38950 using isa flags. Returns NULL_TREE otherwise. */
38952 static tree ix86_get_builtin (enum ix86_builtins code)
38954 struct cl_target_option *opts;
38955 tree target_tree = NULL_TREE;
38957 /* Determine the isa flags of current_function_decl. */
38959 if (current_function_decl)
38960 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
38962 if (target_tree == NULL)
38963 target_tree = target_option_default_node;
38965 opts = TREE_TARGET_OPTION (target_tree);
38967 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
38968 return ix86_builtin_decl (code, true);
38969 else
38970 return NULL_TREE;
38973 /* Returns a function decl for a vectorized version of the builtin function
38974 with builtin function code FN and the result vector type TYPE, or NULL_TREE
38975 if it is not available. */
38977 static tree
38978 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
38979 tree type_in)
38981 enum machine_mode in_mode, out_mode;
38982 int in_n, out_n;
38983 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
38985 if (TREE_CODE (type_out) != VECTOR_TYPE
38986 || TREE_CODE (type_in) != VECTOR_TYPE
38987 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
38988 return NULL_TREE;
38990 out_mode = TYPE_MODE (TREE_TYPE (type_out));
38991 out_n = TYPE_VECTOR_SUBPARTS (type_out);
38992 in_mode = TYPE_MODE (TREE_TYPE (type_in));
38993 in_n = TYPE_VECTOR_SUBPARTS (type_in);
38995 switch (fn)
38997 case BUILT_IN_SQRT:
38998 if (out_mode == DFmode && in_mode == DFmode)
39000 if (out_n == 2 && in_n == 2)
39001 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
39002 else if (out_n == 4 && in_n == 4)
39003 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
39004 else if (out_n == 8 && in_n == 8)
39005 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
39007 break;
39009 case BUILT_IN_EXP2F:
39010 if (out_mode == SFmode && in_mode == SFmode)
39012 if (out_n == 16 && in_n == 16)
39013 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
39015 break;
39017 case BUILT_IN_SQRTF:
39018 if (out_mode == SFmode && in_mode == SFmode)
39020 if (out_n == 4 && in_n == 4)
39021 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
39022 else if (out_n == 8 && in_n == 8)
39023 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
39024 else if (out_n == 16 && in_n == 16)
39025 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
39027 break;
39029 case BUILT_IN_IFLOOR:
39030 case BUILT_IN_LFLOOR:
39031 case BUILT_IN_LLFLOOR:
39032 /* The round insn does not trap on denormals. */
39033 if (flag_trapping_math || !TARGET_ROUND)
39034 break;
39036 if (out_mode == SImode && in_mode == DFmode)
39038 if (out_n == 4 && in_n == 2)
39039 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
39040 else if (out_n == 8 && in_n == 4)
39041 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
39042 else if (out_n == 16 && in_n == 8)
39043 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
39045 break;
39047 case BUILT_IN_IFLOORF:
39048 case BUILT_IN_LFLOORF:
39049 case BUILT_IN_LLFLOORF:
39050 /* The round insn does not trap on denormals. */
39051 if (flag_trapping_math || !TARGET_ROUND)
39052 break;
39054 if (out_mode == SImode && in_mode == SFmode)
39056 if (out_n == 4 && in_n == 4)
39057 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
39058 else if (out_n == 8 && in_n == 8)
39059 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
39061 break;
39063 case BUILT_IN_ICEIL:
39064 case BUILT_IN_LCEIL:
39065 case BUILT_IN_LLCEIL:
39066 /* The round insn does not trap on denormals. */
39067 if (flag_trapping_math || !TARGET_ROUND)
39068 break;
39070 if (out_mode == SImode && in_mode == DFmode)
39072 if (out_n == 4 && in_n == 2)
39073 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
39074 else if (out_n == 8 && in_n == 4)
39075 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
39076 else if (out_n == 16 && in_n == 8)
39077 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
39079 break;
39081 case BUILT_IN_ICEILF:
39082 case BUILT_IN_LCEILF:
39083 case BUILT_IN_LLCEILF:
39084 /* The round insn does not trap on denormals. */
39085 if (flag_trapping_math || !TARGET_ROUND)
39086 break;
39088 if (out_mode == SImode && in_mode == SFmode)
39090 if (out_n == 4 && in_n == 4)
39091 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
39092 else if (out_n == 8 && in_n == 8)
39093 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
39095 break;
39097 case BUILT_IN_IRINT:
39098 case BUILT_IN_LRINT:
39099 case BUILT_IN_LLRINT:
39100 if (out_mode == SImode && in_mode == DFmode)
39102 if (out_n == 4 && in_n == 2)
39103 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
39104 else if (out_n == 8 && in_n == 4)
39105 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
39107 break;
39109 case BUILT_IN_IRINTF:
39110 case BUILT_IN_LRINTF:
39111 case BUILT_IN_LLRINTF:
39112 if (out_mode == SImode && in_mode == SFmode)
39114 if (out_n == 4 && in_n == 4)
39115 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
39116 else if (out_n == 8 && in_n == 8)
39117 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
39119 break;
39121 case BUILT_IN_IROUND:
39122 case BUILT_IN_LROUND:
39123 case BUILT_IN_LLROUND:
39124 /* The round insn does not trap on denormals. */
39125 if (flag_trapping_math || !TARGET_ROUND)
39126 break;
39128 if (out_mode == SImode && in_mode == DFmode)
39130 if (out_n == 4 && in_n == 2)
39131 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
39132 else if (out_n == 8 && in_n == 4)
39133 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
39134 else if (out_n == 16 && in_n == 8)
39135 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
39137 break;
39139 case BUILT_IN_IROUNDF:
39140 case BUILT_IN_LROUNDF:
39141 case BUILT_IN_LLROUNDF:
39142 /* The round insn does not trap on denormals. */
39143 if (flag_trapping_math || !TARGET_ROUND)
39144 break;
39146 if (out_mode == SImode && in_mode == SFmode)
39148 if (out_n == 4 && in_n == 4)
39149 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
39150 else if (out_n == 8 && in_n == 8)
39151 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
39153 break;
39155 case BUILT_IN_COPYSIGN:
39156 if (out_mode == DFmode && in_mode == DFmode)
39158 if (out_n == 2 && in_n == 2)
39159 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
39160 else if (out_n == 4 && in_n == 4)
39161 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
39162 else if (out_n == 8 && in_n == 8)
39163 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
39165 break;
39167 case BUILT_IN_COPYSIGNF:
39168 if (out_mode == SFmode && in_mode == SFmode)
39170 if (out_n == 4 && in_n == 4)
39171 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
39172 else if (out_n == 8 && in_n == 8)
39173 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
39174 else if (out_n == 16 && in_n == 16)
39175 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
39177 break;
39179 case BUILT_IN_FLOOR:
39180 /* The round insn does not trap on denormals. */
39181 if (flag_trapping_math || !TARGET_ROUND)
39182 break;
39184 if (out_mode == DFmode && in_mode == DFmode)
39186 if (out_n == 2 && in_n == 2)
39187 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
39188 else if (out_n == 4 && in_n == 4)
39189 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
39191 break;
39193 case BUILT_IN_FLOORF:
39194 /* The round insn does not trap on denormals. */
39195 if (flag_trapping_math || !TARGET_ROUND)
39196 break;
39198 if (out_mode == SFmode && in_mode == SFmode)
39200 if (out_n == 4 && in_n == 4)
39201 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
39202 else if (out_n == 8 && in_n == 8)
39203 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
39205 break;
39207 case BUILT_IN_CEIL:
39208 /* The round insn does not trap on denormals. */
39209 if (flag_trapping_math || !TARGET_ROUND)
39210 break;
39212 if (out_mode == DFmode && in_mode == DFmode)
39214 if (out_n == 2 && in_n == 2)
39215 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
39216 else if (out_n == 4 && in_n == 4)
39217 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
39219 break;
39221 case BUILT_IN_CEILF:
39222 /* The round insn does not trap on denormals. */
39223 if (flag_trapping_math || !TARGET_ROUND)
39224 break;
39226 if (out_mode == SFmode && in_mode == SFmode)
39228 if (out_n == 4 && in_n == 4)
39229 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
39230 else if (out_n == 8 && in_n == 8)
39231 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
39233 break;
39235 case BUILT_IN_TRUNC:
39236 /* The round insn does not trap on denormals. */
39237 if (flag_trapping_math || !TARGET_ROUND)
39238 break;
39240 if (out_mode == DFmode && in_mode == DFmode)
39242 if (out_n == 2 && in_n == 2)
39243 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
39244 else if (out_n == 4 && in_n == 4)
39245 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
39247 break;
39249 case BUILT_IN_TRUNCF:
39250 /* The round insn does not trap on denormals. */
39251 if (flag_trapping_math || !TARGET_ROUND)
39252 break;
39254 if (out_mode == SFmode && in_mode == SFmode)
39256 if (out_n == 4 && in_n == 4)
39257 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
39258 else if (out_n == 8 && in_n == 8)
39259 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
39261 break;
39263 case BUILT_IN_RINT:
39264 /* The round insn does not trap on denormals. */
39265 if (flag_trapping_math || !TARGET_ROUND)
39266 break;
39268 if (out_mode == DFmode && in_mode == DFmode)
39270 if (out_n == 2 && in_n == 2)
39271 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
39272 else if (out_n == 4 && in_n == 4)
39273 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
39275 break;
39277 case BUILT_IN_RINTF:
39278 /* The round insn does not trap on denormals. */
39279 if (flag_trapping_math || !TARGET_ROUND)
39280 break;
39282 if (out_mode == SFmode && in_mode == SFmode)
39284 if (out_n == 4 && in_n == 4)
39285 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
39286 else if (out_n == 8 && in_n == 8)
39287 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
39289 break;
39291 case BUILT_IN_ROUND:
39292 /* The round insn does not trap on denormals. */
39293 if (flag_trapping_math || !TARGET_ROUND)
39294 break;
39296 if (out_mode == DFmode && in_mode == DFmode)
39298 if (out_n == 2 && in_n == 2)
39299 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
39300 else if (out_n == 4 && in_n == 4)
39301 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
39303 break;
39305 case BUILT_IN_ROUNDF:
39306 /* The round insn does not trap on denormals. */
39307 if (flag_trapping_math || !TARGET_ROUND)
39308 break;
39310 if (out_mode == SFmode && in_mode == SFmode)
39312 if (out_n == 4 && in_n == 4)
39313 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
39314 else if (out_n == 8 && in_n == 8)
39315 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
39317 break;
39319 case BUILT_IN_FMA:
39320 if (out_mode == DFmode && in_mode == DFmode)
39322 if (out_n == 2 && in_n == 2)
39323 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
39324 if (out_n == 4 && in_n == 4)
39325 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
39327 break;
39329 case BUILT_IN_FMAF:
39330 if (out_mode == SFmode && in_mode == SFmode)
39332 if (out_n == 4 && in_n == 4)
39333 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
39334 if (out_n == 8 && in_n == 8)
39335 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
39337 break;
39339 default:
39340 break;
39343 /* Dispatch to a handler for a vectorization library. */
39344 if (ix86_veclib_handler)
39345 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
39346 type_in);
39348 return NULL_TREE;
39351 /* Handler for an SVML-style interface to
39352 a library with vectorized intrinsics. */
39354 static tree
39355 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
39357 char name[20];
39358 tree fntype, new_fndecl, args;
39359 unsigned arity;
39360 const char *bname;
39361 enum machine_mode el_mode, in_mode;
39362 int n, in_n;
39364 /* The SVML is suitable for unsafe math only. */
39365 if (!flag_unsafe_math_optimizations)
39366 return NULL_TREE;
39368 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39369 n = TYPE_VECTOR_SUBPARTS (type_out);
39370 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39371 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39372 if (el_mode != in_mode
39373 || n != in_n)
39374 return NULL_TREE;
39376 switch (fn)
39378 case BUILT_IN_EXP:
39379 case BUILT_IN_LOG:
39380 case BUILT_IN_LOG10:
39381 case BUILT_IN_POW:
39382 case BUILT_IN_TANH:
39383 case BUILT_IN_TAN:
39384 case BUILT_IN_ATAN:
39385 case BUILT_IN_ATAN2:
39386 case BUILT_IN_ATANH:
39387 case BUILT_IN_CBRT:
39388 case BUILT_IN_SINH:
39389 case BUILT_IN_SIN:
39390 case BUILT_IN_ASINH:
39391 case BUILT_IN_ASIN:
39392 case BUILT_IN_COSH:
39393 case BUILT_IN_COS:
39394 case BUILT_IN_ACOSH:
39395 case BUILT_IN_ACOS:
39396 if (el_mode != DFmode || n != 2)
39397 return NULL_TREE;
39398 break;
39400 case BUILT_IN_EXPF:
39401 case BUILT_IN_LOGF:
39402 case BUILT_IN_LOG10F:
39403 case BUILT_IN_POWF:
39404 case BUILT_IN_TANHF:
39405 case BUILT_IN_TANF:
39406 case BUILT_IN_ATANF:
39407 case BUILT_IN_ATAN2F:
39408 case BUILT_IN_ATANHF:
39409 case BUILT_IN_CBRTF:
39410 case BUILT_IN_SINHF:
39411 case BUILT_IN_SINF:
39412 case BUILT_IN_ASINHF:
39413 case BUILT_IN_ASINF:
39414 case BUILT_IN_COSHF:
39415 case BUILT_IN_COSF:
39416 case BUILT_IN_ACOSHF:
39417 case BUILT_IN_ACOSF:
39418 if (el_mode != SFmode || n != 4)
39419 return NULL_TREE;
39420 break;
39422 default:
39423 return NULL_TREE;
39426 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
39428 if (fn == BUILT_IN_LOGF)
39429 strcpy (name, "vmlsLn4");
39430 else if (fn == BUILT_IN_LOG)
39431 strcpy (name, "vmldLn2");
39432 else if (n == 4)
39434 sprintf (name, "vmls%s", bname+10);
39435 name[strlen (name)-1] = '4';
39437 else
39438 sprintf (name, "vmld%s2", bname+10);
39440 /* Convert to uppercase. */
39441 name[4] &= ~0x20;
39443 arity = 0;
39444 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
39445 args;
39446 args = TREE_CHAIN (args))
39447 arity++;
39449 if (arity == 1)
39450 fntype = build_function_type_list (type_out, type_in, NULL);
39451 else
39452 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39454 /* Build a function declaration for the vectorized function. */
39455 new_fndecl = build_decl (BUILTINS_LOCATION,
39456 FUNCTION_DECL, get_identifier (name), fntype);
39457 TREE_PUBLIC (new_fndecl) = 1;
39458 DECL_EXTERNAL (new_fndecl) = 1;
39459 DECL_IS_NOVOPS (new_fndecl) = 1;
39460 TREE_READONLY (new_fndecl) = 1;
39462 return new_fndecl;
39465 /* Handler for an ACML-style interface to
39466 a library with vectorized intrinsics. */
39468 static tree
39469 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
39471 char name[20] = "__vr.._";
39472 tree fntype, new_fndecl, args;
39473 unsigned arity;
39474 const char *bname;
39475 enum machine_mode el_mode, in_mode;
39476 int n, in_n;
39478 /* The ACML is 64bits only and suitable for unsafe math only as
39479 it does not correctly support parts of IEEE with the required
39480 precision such as denormals. */
39481 if (!TARGET_64BIT
39482 || !flag_unsafe_math_optimizations)
39483 return NULL_TREE;
39485 el_mode = TYPE_MODE (TREE_TYPE (type_out));
39486 n = TYPE_VECTOR_SUBPARTS (type_out);
39487 in_mode = TYPE_MODE (TREE_TYPE (type_in));
39488 in_n = TYPE_VECTOR_SUBPARTS (type_in);
39489 if (el_mode != in_mode
39490 || n != in_n)
39491 return NULL_TREE;
39493 switch (fn)
39495 case BUILT_IN_SIN:
39496 case BUILT_IN_COS:
39497 case BUILT_IN_EXP:
39498 case BUILT_IN_LOG:
39499 case BUILT_IN_LOG2:
39500 case BUILT_IN_LOG10:
39501 name[4] = 'd';
39502 name[5] = '2';
39503 if (el_mode != DFmode
39504 || n != 2)
39505 return NULL_TREE;
39506 break;
39508 case BUILT_IN_SINF:
39509 case BUILT_IN_COSF:
39510 case BUILT_IN_EXPF:
39511 case BUILT_IN_POWF:
39512 case BUILT_IN_LOGF:
39513 case BUILT_IN_LOG2F:
39514 case BUILT_IN_LOG10F:
39515 name[4] = 's';
39516 name[5] = '4';
39517 if (el_mode != SFmode
39518 || n != 4)
39519 return NULL_TREE;
39520 break;
39522 default:
39523 return NULL_TREE;
39526 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
39527 sprintf (name + 7, "%s", bname+10);
39529 arity = 0;
39530 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
39531 args;
39532 args = TREE_CHAIN (args))
39533 arity++;
39535 if (arity == 1)
39536 fntype = build_function_type_list (type_out, type_in, NULL);
39537 else
39538 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
39540 /* Build a function declaration for the vectorized function. */
39541 new_fndecl = build_decl (BUILTINS_LOCATION,
39542 FUNCTION_DECL, get_identifier (name), fntype);
39543 TREE_PUBLIC (new_fndecl) = 1;
39544 DECL_EXTERNAL (new_fndecl) = 1;
39545 DECL_IS_NOVOPS (new_fndecl) = 1;
39546 TREE_READONLY (new_fndecl) = 1;
39548 return new_fndecl;
39551 /* Returns a decl of a function that implements gather load with
39552 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
39553 Return NULL_TREE if it is not available. */
39555 static tree
39556 ix86_vectorize_builtin_gather (const_tree mem_vectype,
39557 const_tree index_type, int scale)
39559 bool si;
39560 enum ix86_builtins code;
39562 if (! TARGET_AVX2)
39563 return NULL_TREE;
39565 if ((TREE_CODE (index_type) != INTEGER_TYPE
39566 && !POINTER_TYPE_P (index_type))
39567 || (TYPE_MODE (index_type) != SImode
39568 && TYPE_MODE (index_type) != DImode))
39569 return NULL_TREE;
39571 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
39572 return NULL_TREE;
39574 /* v*gather* insn sign extends index to pointer mode. */
39575 if (TYPE_PRECISION (index_type) < POINTER_SIZE
39576 && TYPE_UNSIGNED (index_type))
39577 return NULL_TREE;
39579 if (scale <= 0
39580 || scale > 8
39581 || (scale & (scale - 1)) != 0)
39582 return NULL_TREE;
39584 si = TYPE_MODE (index_type) == SImode;
39585 switch (TYPE_MODE (mem_vectype))
39587 case V2DFmode:
39588 if (TARGET_AVX512VL)
39589 code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
39590 else
39591 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
39592 break;
39593 case V4DFmode:
39594 if (TARGET_AVX512VL)
39595 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
39596 else
39597 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
39598 break;
39599 case V2DImode:
39600 if (TARGET_AVX512VL)
39601 code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
39602 else
39603 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
39604 break;
39605 case V4DImode:
39606 if (TARGET_AVX512VL)
39607 code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
39608 else
39609 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
39610 break;
39611 case V4SFmode:
39612 if (TARGET_AVX512VL)
39613 code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
39614 else
39615 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
39616 break;
39617 case V8SFmode:
39618 if (TARGET_AVX512VL)
39619 code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
39620 else
39621 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
39622 break;
39623 case V4SImode:
39624 if (TARGET_AVX512VL)
39625 code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
39626 else
39627 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
39628 break;
39629 case V8SImode:
39630 if (TARGET_AVX512VL)
39631 code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
39632 else
39633 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
39634 break;
39635 case V8DFmode:
39636 if (TARGET_AVX512F)
39637 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
39638 else
39639 return NULL_TREE;
39640 break;
39641 case V8DImode:
39642 if (TARGET_AVX512F)
39643 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
39644 else
39645 return NULL_TREE;
39646 break;
39647 case V16SFmode:
39648 if (TARGET_AVX512F)
39649 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
39650 else
39651 return NULL_TREE;
39652 break;
39653 case V16SImode:
39654 if (TARGET_AVX512F)
39655 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
39656 else
39657 return NULL_TREE;
39658 break;
39659 default:
39660 return NULL_TREE;
39663 return ix86_get_builtin (code);
39666 /* Returns a code for a target-specific builtin that implements
39667 reciprocal of the function, or NULL_TREE if not available. */
39669 static tree
39670 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
39671 bool sqrt ATTRIBUTE_UNUSED)
39673 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
39674 && flag_finite_math_only && !flag_trapping_math
39675 && flag_unsafe_math_optimizations))
39676 return NULL_TREE;
39678 if (md_fn)
39679 /* Machine dependent builtins. */
39680 switch (fn)
39682 /* Vectorized version of sqrt to rsqrt conversion. */
39683 case IX86_BUILTIN_SQRTPS_NR:
39684 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
39686 case IX86_BUILTIN_SQRTPS_NR256:
39687 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
39689 default:
39690 return NULL_TREE;
39692 else
39693 /* Normal builtins. */
39694 switch (fn)
39696 /* Sqrt to rsqrt conversion. */
39697 case BUILT_IN_SQRTF:
39698 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
39700 default:
39701 return NULL_TREE;
39705 /* Helper for avx_vpermilps256_operand et al. This is also used by
39706 the expansion functions to turn the parallel back into a mask.
39707 The return value is 0 for no match and the imm8+1 for a match. */
39710 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
39712 unsigned i, nelt = GET_MODE_NUNITS (mode);
39713 unsigned mask = 0;
39714 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
39716 if (XVECLEN (par, 0) != (int) nelt)
39717 return 0;
39719 /* Validate that all of the elements are constants, and not totally
39720 out of range. Copy the data into an integral array to make the
39721 subsequent checks easier. */
39722 for (i = 0; i < nelt; ++i)
39724 rtx er = XVECEXP (par, 0, i);
39725 unsigned HOST_WIDE_INT ei;
39727 if (!CONST_INT_P (er))
39728 return 0;
39729 ei = INTVAL (er);
39730 if (ei >= nelt)
39731 return 0;
39732 ipar[i] = ei;
39735 switch (mode)
39737 case V8DFmode:
39738 /* In the 512-bit DFmode case, we can only move elements within
39739 a 128-bit lane. First fill the second part of the mask,
39740 then fallthru. */
39741 for (i = 4; i < 6; ++i)
39743 if (ipar[i] < 4 || ipar[i] >= 6)
39744 return 0;
39745 mask |= (ipar[i] - 4) << i;
39747 for (i = 6; i < 8; ++i)
39749 if (ipar[i] < 6)
39750 return 0;
39751 mask |= (ipar[i] - 6) << i;
39753 /* FALLTHRU */
39755 case V4DFmode:
39756 /* In the 256-bit DFmode case, we can only move elements within
39757 a 128-bit lane. */
39758 for (i = 0; i < 2; ++i)
39760 if (ipar[i] >= 2)
39761 return 0;
39762 mask |= ipar[i] << i;
39764 for (i = 2; i < 4; ++i)
39766 if (ipar[i] < 2)
39767 return 0;
39768 mask |= (ipar[i] - 2) << i;
39770 break;
39772 case V16SFmode:
39773 /* In 512 bit SFmode case, permutation in the upper 256 bits
39774 must mirror the permutation in the lower 256-bits. */
39775 for (i = 0; i < 8; ++i)
39776 if (ipar[i] + 8 != ipar[i + 8])
39777 return 0;
39778 /* FALLTHRU */
39780 case V8SFmode:
39781 /* In 256 bit SFmode case, we have full freedom of
39782 movement within the low 128-bit lane, but the high 128-bit
39783 lane must mirror the exact same pattern. */
39784 for (i = 0; i < 4; ++i)
39785 if (ipar[i] + 4 != ipar[i + 4])
39786 return 0;
39787 nelt = 4;
39788 /* FALLTHRU */
39790 case V2DFmode:
39791 case V4SFmode:
39792 /* In the 128-bit case, we've full freedom in the placement of
39793 the elements from the source operand. */
39794 for (i = 0; i < nelt; ++i)
39795 mask |= ipar[i] << (i * (nelt / 2));
39796 break;
39798 default:
39799 gcc_unreachable ();
39802 /* Make sure success has a non-zero value by adding one. */
39803 return mask + 1;
39806 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
39807 the expansion functions to turn the parallel back into a mask.
39808 The return value is 0 for no match and the imm8+1 for a match. */
39811 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
39813 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
39814 unsigned mask = 0;
39815 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
39817 if (XVECLEN (par, 0) != (int) nelt)
39818 return 0;
39820 /* Validate that all of the elements are constants, and not totally
39821 out of range. Copy the data into an integral array to make the
39822 subsequent checks easier. */
39823 for (i = 0; i < nelt; ++i)
39825 rtx er = XVECEXP (par, 0, i);
39826 unsigned HOST_WIDE_INT ei;
39828 if (!CONST_INT_P (er))
39829 return 0;
39830 ei = INTVAL (er);
39831 if (ei >= 2 * nelt)
39832 return 0;
39833 ipar[i] = ei;
39836 /* Validate that the halves of the permute are halves. */
39837 for (i = 0; i < nelt2 - 1; ++i)
39838 if (ipar[i] + 1 != ipar[i + 1])
39839 return 0;
39840 for (i = nelt2; i < nelt - 1; ++i)
39841 if (ipar[i] + 1 != ipar[i + 1])
39842 return 0;
39844 /* Reconstruct the mask. */
39845 for (i = 0; i < 2; ++i)
39847 unsigned e = ipar[i * nelt2];
39848 if (e % nelt2)
39849 return 0;
39850 e /= nelt2;
39851 mask |= e << (i * 4);
39854 /* Make sure success has a non-zero value by adding one. */
39855 return mask + 1;
39858 /* Return a register priority for hard reg REGNO. */
39859 static int
39860 ix86_register_priority (int hard_regno)
39862 /* ebp and r13 as the base always wants a displacement, r12 as the
39863 base always wants an index. So discourage their usage in an
39864 address. */
39865 if (hard_regno == R12_REG || hard_regno == R13_REG)
39866 return 0;
39867 if (hard_regno == BP_REG)
39868 return 1;
39869 /* New x86-64 int registers result in bigger code size. Discourage
39870 them. */
39871 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
39872 return 2;
39873 /* New x86-64 SSE registers result in bigger code size. Discourage
39874 them. */
39875 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
39876 return 2;
39877 /* Usage of AX register results in smaller code. Prefer it. */
39878 if (hard_regno == 0)
39879 return 4;
39880 return 3;
39883 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
39885 Put float CONST_DOUBLE in the constant pool instead of fp regs.
39886 QImode must go into class Q_REGS.
39887 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
39888 movdf to do mem-to-mem moves through integer regs. */
39890 static reg_class_t
39891 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
39893 enum machine_mode mode = GET_MODE (x);
39895 /* We're only allowed to return a subclass of CLASS. Many of the
39896 following checks fail for NO_REGS, so eliminate that early. */
39897 if (regclass == NO_REGS)
39898 return NO_REGS;
39900 /* All classes can load zeros. */
39901 if (x == CONST0_RTX (mode))
39902 return regclass;
39904 /* Force constants into memory if we are loading a (nonzero) constant into
39905 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
39906 instructions to load from a constant. */
39907 if (CONSTANT_P (x)
39908 && (MAYBE_MMX_CLASS_P (regclass)
39909 || MAYBE_SSE_CLASS_P (regclass)
39910 || MAYBE_MASK_CLASS_P (regclass)))
39911 return NO_REGS;
39913 /* Prefer SSE regs only, if we can use them for math. */
39914 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
39915 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
39917 /* Floating-point constants need more complex checks. */
39918 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
39920 /* General regs can load everything. */
39921 if (reg_class_subset_p (regclass, GENERAL_REGS))
39922 return regclass;
39924 /* Floats can load 0 and 1 plus some others. Note that we eliminated
39925 zero above. We only want to wind up preferring 80387 registers if
39926 we plan on doing computation with them. */
39927 if (TARGET_80387
39928 && standard_80387_constant_p (x) > 0)
39930 /* Limit class to non-sse. */
39931 if (regclass == FLOAT_SSE_REGS)
39932 return FLOAT_REGS;
39933 if (regclass == FP_TOP_SSE_REGS)
39934 return FP_TOP_REG;
39935 if (regclass == FP_SECOND_SSE_REGS)
39936 return FP_SECOND_REG;
39937 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
39938 return regclass;
39941 return NO_REGS;
39944 /* Generally when we see PLUS here, it's the function invariant
39945 (plus soft-fp const_int). Which can only be computed into general
39946 regs. */
39947 if (GET_CODE (x) == PLUS)
39948 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
39950 /* QImode constants are easy to load, but non-constant QImode data
39951 must go into Q_REGS. */
39952 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
39954 if (reg_class_subset_p (regclass, Q_REGS))
39955 return regclass;
39956 if (reg_class_subset_p (Q_REGS, regclass))
39957 return Q_REGS;
39958 return NO_REGS;
39961 return regclass;
39964 /* Discourage putting floating-point values in SSE registers unless
39965 SSE math is being used, and likewise for the 387 registers. */
39966 static reg_class_t
39967 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
39969 enum machine_mode mode = GET_MODE (x);
39971 /* Restrict the output reload class to the register bank that we are doing
39972 math on. If we would like not to return a subset of CLASS, reject this
39973 alternative: if reload cannot do this, it will still use its choice. */
39974 mode = GET_MODE (x);
39975 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
39976 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
39978 if (X87_FLOAT_MODE_P (mode))
39980 if (regclass == FP_TOP_SSE_REGS)
39981 return FP_TOP_REG;
39982 else if (regclass == FP_SECOND_SSE_REGS)
39983 return FP_SECOND_REG;
39984 else
39985 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
39988 return regclass;
39991 static reg_class_t
39992 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
39993 enum machine_mode mode, secondary_reload_info *sri)
39995 /* Double-word spills from general registers to non-offsettable memory
39996 references (zero-extended addresses) require special handling. */
39997 if (TARGET_64BIT
39998 && MEM_P (x)
39999 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
40000 && INTEGER_CLASS_P (rclass)
40001 && !offsettable_memref_p (x))
40003 sri->icode = (in_p
40004 ? CODE_FOR_reload_noff_load
40005 : CODE_FOR_reload_noff_store);
40006 /* Add the cost of moving address to a temporary. */
40007 sri->extra_cost = 1;
40009 return NO_REGS;
40012 /* QImode spills from non-QI registers require
40013 intermediate register on 32bit targets. */
40014 if (mode == QImode
40015 && (MAYBE_MASK_CLASS_P (rclass)
40016 || (!TARGET_64BIT && !in_p
40017 && INTEGER_CLASS_P (rclass)
40018 && MAYBE_NON_Q_CLASS_P (rclass))))
40020 int regno;
40022 if (REG_P (x))
40023 regno = REGNO (x);
40024 else
40025 regno = -1;
40027 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
40028 regno = true_regnum (x);
40030 /* Return Q_REGS if the operand is in memory. */
40031 if (regno == -1)
40032 return Q_REGS;
40035 /* This condition handles corner case where an expression involving
40036 pointers gets vectorized. We're trying to use the address of a
40037 stack slot as a vector initializer.
40039 (set (reg:V2DI 74 [ vect_cst_.2 ])
40040 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
40042 Eventually frame gets turned into sp+offset like this:
40044 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40045 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
40046 (const_int 392 [0x188]))))
40048 That later gets turned into:
40050 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40051 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
40052 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
40054 We'll have the following reload recorded:
40056 Reload 0: reload_in (DI) =
40057 (plus:DI (reg/f:DI 7 sp)
40058 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
40059 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40060 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
40061 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
40062 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
40063 reload_reg_rtx: (reg:V2DI 22 xmm1)
40065 Which isn't going to work since SSE instructions can't handle scalar
40066 additions. Returning GENERAL_REGS forces the addition into integer
40067 register and reload can handle subsequent reloads without problems. */
40069 if (in_p && GET_CODE (x) == PLUS
40070 && SSE_CLASS_P (rclass)
40071 && SCALAR_INT_MODE_P (mode))
40072 return GENERAL_REGS;
40074 return NO_REGS;
40077 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
40079 static bool
40080 ix86_class_likely_spilled_p (reg_class_t rclass)
40082 switch (rclass)
40084 case AREG:
40085 case DREG:
40086 case CREG:
40087 case BREG:
40088 case AD_REGS:
40089 case SIREG:
40090 case DIREG:
40091 case SSE_FIRST_REG:
40092 case FP_TOP_REG:
40093 case FP_SECOND_REG:
40094 return true;
40096 default:
40097 break;
40100 return false;
40103 /* If we are copying between general and FP registers, we need a memory
40104 location. The same is true for SSE and MMX registers.
40106 To optimize register_move_cost performance, allow inline variant.
40108 The macro can't work reliably when one of the CLASSES is class containing
40109 registers from multiple units (SSE, MMX, integer). We avoid this by never
40110 combining those units in single alternative in the machine description.
40111 Ensure that this constraint holds to avoid unexpected surprises.
40113 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
40114 enforce these sanity checks. */
40116 static inline bool
40117 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
40118 enum machine_mode mode, int strict)
40120 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
40121 return false;
40122 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
40123 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
40124 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
40125 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
40126 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
40127 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
40129 gcc_assert (!strict || lra_in_progress);
40130 return true;
40133 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
40134 return true;
40136 /* Between mask and general, we have moves no larger than word size. */
40137 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
40138 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
40139 return true;
40141 /* ??? This is a lie. We do have moves between mmx/general, and for
40142 mmx/sse2. But by saying we need secondary memory we discourage the
40143 register allocator from using the mmx registers unless needed. */
40144 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
40145 return true;
40147 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40149 /* SSE1 doesn't have any direct moves from other classes. */
40150 if (!TARGET_SSE2)
40151 return true;
40153 /* If the target says that inter-unit moves are more expensive
40154 than moving through memory, then don't generate them. */
40155 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
40156 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
40157 return true;
40159 /* Between SSE and general, we have moves no larger than word size. */
40160 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40161 return true;
40164 return false;
40167 bool
40168 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
40169 enum machine_mode mode, int strict)
40171 return inline_secondary_memory_needed (class1, class2, mode, strict);
40174 /* Implement the TARGET_CLASS_MAX_NREGS hook.
40176 On the 80386, this is the size of MODE in words,
40177 except in the FP regs, where a single reg is always enough. */
40179 static unsigned char
40180 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
40182 if (MAYBE_INTEGER_CLASS_P (rclass))
40184 if (mode == XFmode)
40185 return (TARGET_64BIT ? 2 : 3);
40186 else if (mode == XCmode)
40187 return (TARGET_64BIT ? 4 : 6);
40188 else
40189 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
40191 else
40193 if (COMPLEX_MODE_P (mode))
40194 return 2;
40195 else
40196 return 1;
40200 /* Return true if the registers in CLASS cannot represent the change from
40201 modes FROM to TO. */
40203 bool
40204 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
40205 enum reg_class regclass)
40207 if (from == to)
40208 return false;
40210 /* x87 registers can't do subreg at all, as all values are reformatted
40211 to extended precision. */
40212 if (MAYBE_FLOAT_CLASS_P (regclass))
40213 return true;
40215 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
40217 /* Vector registers do not support QI or HImode loads. If we don't
40218 disallow a change to these modes, reload will assume it's ok to
40219 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
40220 the vec_dupv4hi pattern. */
40221 if (GET_MODE_SIZE (from) < 4)
40222 return true;
40224 /* Vector registers do not support subreg with nonzero offsets, which
40225 are otherwise valid for integer registers. Since we can't see
40226 whether we have a nonzero offset from here, prohibit all
40227 nonparadoxical subregs changing size. */
40228 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
40229 return true;
40232 return false;
40235 /* Return the cost of moving data of mode M between a
40236 register and memory. A value of 2 is the default; this cost is
40237 relative to those in `REGISTER_MOVE_COST'.
40239 This function is used extensively by register_move_cost that is used to
40240 build tables at startup. Make it inline in this case.
40241 When IN is 2, return maximum of in and out move cost.
40243 If moving between registers and memory is more expensive than
40244 between two registers, you should define this macro to express the
40245 relative cost.
40247 Model also increased moving costs of QImode registers in non
40248 Q_REGS classes.
40250 static inline int
40251 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
40252 int in)
40254 int cost;
40255 if (FLOAT_CLASS_P (regclass))
40257 int index;
40258 switch (mode)
40260 case SFmode:
40261 index = 0;
40262 break;
40263 case DFmode:
40264 index = 1;
40265 break;
40266 case XFmode:
40267 index = 2;
40268 break;
40269 default:
40270 return 100;
40272 if (in == 2)
40273 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
40274 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
40276 if (SSE_CLASS_P (regclass))
40278 int index;
40279 switch (GET_MODE_SIZE (mode))
40281 case 4:
40282 index = 0;
40283 break;
40284 case 8:
40285 index = 1;
40286 break;
40287 case 16:
40288 index = 2;
40289 break;
40290 default:
40291 return 100;
40293 if (in == 2)
40294 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
40295 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
40297 if (MMX_CLASS_P (regclass))
40299 int index;
40300 switch (GET_MODE_SIZE (mode))
40302 case 4:
40303 index = 0;
40304 break;
40305 case 8:
40306 index = 1;
40307 break;
40308 default:
40309 return 100;
40311 if (in)
40312 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
40313 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
40315 switch (GET_MODE_SIZE (mode))
40317 case 1:
40318 if (Q_CLASS_P (regclass) || TARGET_64BIT)
40320 if (!in)
40321 return ix86_cost->int_store[0];
40322 if (TARGET_PARTIAL_REG_DEPENDENCY
40323 && optimize_function_for_speed_p (cfun))
40324 cost = ix86_cost->movzbl_load;
40325 else
40326 cost = ix86_cost->int_load[0];
40327 if (in == 2)
40328 return MAX (cost, ix86_cost->int_store[0]);
40329 return cost;
40331 else
40333 if (in == 2)
40334 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
40335 if (in)
40336 return ix86_cost->movzbl_load;
40337 else
40338 return ix86_cost->int_store[0] + 4;
40340 break;
40341 case 2:
40342 if (in == 2)
40343 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
40344 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
40345 default:
40346 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
40347 if (mode == TFmode)
40348 mode = XFmode;
40349 if (in == 2)
40350 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
40351 else if (in)
40352 cost = ix86_cost->int_load[2];
40353 else
40354 cost = ix86_cost->int_store[2];
40355 return (cost * (((int) GET_MODE_SIZE (mode)
40356 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
40360 static int
40361 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
40362 bool in)
40364 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
40368 /* Return the cost of moving data from a register in class CLASS1 to
40369 one in class CLASS2.
40371 It is not required that the cost always equal 2 when FROM is the same as TO;
40372 on some machines it is expensive to move between registers if they are not
40373 general registers. */
40375 static int
40376 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
40377 reg_class_t class2_i)
40379 enum reg_class class1 = (enum reg_class) class1_i;
40380 enum reg_class class2 = (enum reg_class) class2_i;
40382 /* In case we require secondary memory, compute cost of the store followed
40383 by load. In order to avoid bad register allocation choices, we need
40384 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
40386 if (inline_secondary_memory_needed (class1, class2, mode, 0))
40388 int cost = 1;
40390 cost += inline_memory_move_cost (mode, class1, 2);
40391 cost += inline_memory_move_cost (mode, class2, 2);
40393 /* In case of copying from general_purpose_register we may emit multiple
40394 stores followed by single load causing memory size mismatch stall.
40395 Count this as arbitrarily high cost of 20. */
40396 if (targetm.class_max_nregs (class1, mode)
40397 > targetm.class_max_nregs (class2, mode))
40398 cost += 20;
40400 /* In the case of FP/MMX moves, the registers actually overlap, and we
40401 have to switch modes in order to treat them differently. */
40402 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
40403 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
40404 cost += 20;
40406 return cost;
40409 /* Moves between SSE/MMX and integer unit are expensive. */
40410 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
40411 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
40413 /* ??? By keeping returned value relatively high, we limit the number
40414 of moves between integer and MMX/SSE registers for all targets.
40415 Additionally, high value prevents problem with x86_modes_tieable_p(),
40416 where integer modes in MMX/SSE registers are not tieable
40417 because of missing QImode and HImode moves to, from or between
40418 MMX/SSE registers. */
40419 return MAX (8, ix86_cost->mmxsse_to_integer);
40421 if (MAYBE_FLOAT_CLASS_P (class1))
40422 return ix86_cost->fp_move;
40423 if (MAYBE_SSE_CLASS_P (class1))
40424 return ix86_cost->sse_move;
40425 if (MAYBE_MMX_CLASS_P (class1))
40426 return ix86_cost->mmx_move;
40427 return 2;
40430 /* Return TRUE if hard register REGNO can hold a value of machine-mode
40431 MODE. */
40433 bool
40434 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
40436 /* Flags and only flags can only hold CCmode values. */
40437 if (CC_REGNO_P (regno))
40438 return GET_MODE_CLASS (mode) == MODE_CC;
40439 if (GET_MODE_CLASS (mode) == MODE_CC
40440 || GET_MODE_CLASS (mode) == MODE_RANDOM
40441 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
40442 return false;
40443 if (STACK_REGNO_P (regno))
40444 return VALID_FP_MODE_P (mode);
40445 if (MASK_REGNO_P (regno))
40446 return (VALID_MASK_REG_MODE (mode)
40447 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
40448 if (SSE_REGNO_P (regno))
40450 /* We implement the move patterns for all vector modes into and
40451 out of SSE registers, even when no operation instructions
40452 are available. */
40454 /* For AVX-512 we allow, regardless of regno:
40455 - XI mode
40456 - any of 512-bit wide vector mode
40457 - any scalar mode. */
40458 if (TARGET_AVX512F
40459 && (mode == XImode
40460 || VALID_AVX512F_REG_MODE (mode)
40461 || VALID_AVX512F_SCALAR_MODE (mode)))
40462 return true;
40464 /* TODO check for QI/HI scalars. */
40465 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
40466 if (TARGET_AVX512VL
40467 && (mode == OImode
40468 || mode == TImode
40469 || VALID_AVX256_REG_MODE (mode)
40470 || VALID_AVX512VL_128_REG_MODE (mode)))
40471 return true;
40473 /* xmm16-xmm31 are only available for AVX-512. */
40474 if (EXT_REX_SSE_REGNO_P (regno))
40475 return false;
40477 /* OImode and AVX modes are available only when AVX is enabled. */
40478 return ((TARGET_AVX
40479 && VALID_AVX256_REG_OR_OI_MODE (mode))
40480 || VALID_SSE_REG_MODE (mode)
40481 || VALID_SSE2_REG_MODE (mode)
40482 || VALID_MMX_REG_MODE (mode)
40483 || VALID_MMX_REG_MODE_3DNOW (mode));
40485 if (MMX_REGNO_P (regno))
40487 /* We implement the move patterns for 3DNOW modes even in MMX mode,
40488 so if the register is available at all, then we can move data of
40489 the given mode into or out of it. */
40490 return (VALID_MMX_REG_MODE (mode)
40491 || VALID_MMX_REG_MODE_3DNOW (mode));
40494 if (mode == QImode)
40496 /* Take care for QImode values - they can be in non-QI regs,
40497 but then they do cause partial register stalls. */
40498 if (ANY_QI_REGNO_P (regno))
40499 return true;
40500 if (!TARGET_PARTIAL_REG_STALL)
40501 return true;
40502 /* LRA checks if the hard register is OK for the given mode.
40503 QImode values can live in non-QI regs, so we allow all
40504 registers here. */
40505 if (lra_in_progress)
40506 return true;
40507 return !can_create_pseudo_p ();
40509 /* We handle both integer and floats in the general purpose registers. */
40510 else if (VALID_INT_MODE_P (mode))
40511 return true;
40512 else if (VALID_FP_MODE_P (mode))
40513 return true;
40514 else if (VALID_DFP_MODE_P (mode))
40515 return true;
40516 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
40517 on to use that value in smaller contexts, this can easily force a
40518 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
40519 supporting DImode, allow it. */
40520 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
40521 return true;
40523 return false;
40526 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
40527 tieable integer mode. */
40529 static bool
40530 ix86_tieable_integer_mode_p (enum machine_mode mode)
40532 switch (mode)
40534 case HImode:
40535 case SImode:
40536 return true;
40538 case QImode:
40539 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
40541 case DImode:
40542 return TARGET_64BIT;
40544 default:
40545 return false;
40549 /* Return true if MODE1 is accessible in a register that can hold MODE2
40550 without copying. That is, all register classes that can hold MODE2
40551 can also hold MODE1. */
40553 bool
40554 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
40556 if (mode1 == mode2)
40557 return true;
40559 if (ix86_tieable_integer_mode_p (mode1)
40560 && ix86_tieable_integer_mode_p (mode2))
40561 return true;
40563 /* MODE2 being XFmode implies fp stack or general regs, which means we
40564 can tie any smaller floating point modes to it. Note that we do not
40565 tie this with TFmode. */
40566 if (mode2 == XFmode)
40567 return mode1 == SFmode || mode1 == DFmode;
40569 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
40570 that we can tie it with SFmode. */
40571 if (mode2 == DFmode)
40572 return mode1 == SFmode;
40574 /* If MODE2 is only appropriate for an SSE register, then tie with
40575 any other mode acceptable to SSE registers. */
40576 if (GET_MODE_SIZE (mode2) == 32
40577 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40578 return (GET_MODE_SIZE (mode1) == 32
40579 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40580 if (GET_MODE_SIZE (mode2) == 16
40581 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
40582 return (GET_MODE_SIZE (mode1) == 16
40583 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
40585 /* If MODE2 is appropriate for an MMX register, then tie
40586 with any other mode acceptable to MMX registers. */
40587 if (GET_MODE_SIZE (mode2) == 8
40588 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
40589 return (GET_MODE_SIZE (mode1) == 8
40590 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
40592 return false;
40595 /* Return the cost of moving between two registers of mode MODE. */
40597 static int
40598 ix86_set_reg_reg_cost (enum machine_mode mode)
40600 unsigned int units = UNITS_PER_WORD;
40602 switch (GET_MODE_CLASS (mode))
40604 default:
40605 break;
40607 case MODE_CC:
40608 units = GET_MODE_SIZE (CCmode);
40609 break;
40611 case MODE_FLOAT:
40612 if ((TARGET_SSE && mode == TFmode)
40613 || (TARGET_80387 && mode == XFmode)
40614 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
40615 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
40616 units = GET_MODE_SIZE (mode);
40617 break;
40619 case MODE_COMPLEX_FLOAT:
40620 if ((TARGET_SSE && mode == TCmode)
40621 || (TARGET_80387 && mode == XCmode)
40622 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
40623 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
40624 units = GET_MODE_SIZE (mode);
40625 break;
40627 case MODE_VECTOR_INT:
40628 case MODE_VECTOR_FLOAT:
40629 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
40630 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
40631 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
40632 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
40633 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
40634 units = GET_MODE_SIZE (mode);
40637 /* Return the cost of moving between two registers of mode MODE,
40638 assuming that the move will be in pieces of at most UNITS bytes. */
40639 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
40642 /* Compute a (partial) cost for rtx X. Return true if the complete
40643 cost has been computed, and false if subexpressions should be
40644 scanned. In either case, *TOTAL contains the cost result. */
40646 static bool
40647 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
40648 bool speed)
40650 rtx mask;
40651 enum rtx_code code = (enum rtx_code) code_i;
40652 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
40653 enum machine_mode mode = GET_MODE (x);
40654 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
40656 switch (code)
40658 case SET:
40659 if (register_operand (SET_DEST (x), VOIDmode)
40660 && reg_or_0_operand (SET_SRC (x), VOIDmode))
40662 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
40663 return true;
40665 return false;
40667 case CONST_INT:
40668 case CONST:
40669 case LABEL_REF:
40670 case SYMBOL_REF:
40671 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
40672 *total = 3;
40673 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
40674 *total = 2;
40675 else if (flag_pic && SYMBOLIC_CONST (x)
40676 && !(TARGET_64BIT
40677 && (GET_CODE (x) == LABEL_REF
40678 || (GET_CODE (x) == SYMBOL_REF
40679 && SYMBOL_REF_LOCAL_P (x)))))
40680 *total = 1;
40681 else
40682 *total = 0;
40683 return true;
40685 case CONST_DOUBLE:
40686 if (mode == VOIDmode)
40688 *total = 0;
40689 return true;
40691 switch (standard_80387_constant_p (x))
40693 case 1: /* 0.0 */
40694 *total = 1;
40695 return true;
40696 default: /* Other constants */
40697 *total = 2;
40698 return true;
40699 case 0:
40700 case -1:
40701 break;
40703 if (SSE_FLOAT_MODE_P (mode))
40705 case CONST_VECTOR:
40706 switch (standard_sse_constant_p (x))
40708 case 0:
40709 break;
40710 case 1: /* 0: xor eliminates false dependency */
40711 *total = 0;
40712 return true;
40713 default: /* -1: cmp contains false dependency */
40714 *total = 1;
40715 return true;
40718 /* Fall back to (MEM (SYMBOL_REF)), since that's where
40719 it'll probably end up. Add a penalty for size. */
40720 *total = (COSTS_N_INSNS (1)
40721 + (flag_pic != 0 && !TARGET_64BIT)
40722 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
40723 return true;
40725 case ZERO_EXTEND:
40726 /* The zero extensions is often completely free on x86_64, so make
40727 it as cheap as possible. */
40728 if (TARGET_64BIT && mode == DImode
40729 && GET_MODE (XEXP (x, 0)) == SImode)
40730 *total = 1;
40731 else if (TARGET_ZERO_EXTEND_WITH_AND)
40732 *total = cost->add;
40733 else
40734 *total = cost->movzx;
40735 return false;
40737 case SIGN_EXTEND:
40738 *total = cost->movsx;
40739 return false;
40741 case ASHIFT:
40742 if (SCALAR_INT_MODE_P (mode)
40743 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
40744 && CONST_INT_P (XEXP (x, 1)))
40746 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40747 if (value == 1)
40749 *total = cost->add;
40750 return false;
40752 if ((value == 2 || value == 3)
40753 && cost->lea <= cost->shift_const)
40755 *total = cost->lea;
40756 return false;
40759 /* FALLTHRU */
40761 case ROTATE:
40762 case ASHIFTRT:
40763 case LSHIFTRT:
40764 case ROTATERT:
40765 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40767 /* ??? Should be SSE vector operation cost. */
40768 /* At least for published AMD latencies, this really is the same
40769 as the latency for a simple fpu operation like fabs. */
40770 /* V*QImode is emulated with 1-11 insns. */
40771 if (mode == V16QImode || mode == V32QImode)
40773 int count = 11;
40774 if (TARGET_XOP && mode == V16QImode)
40776 /* For XOP we use vpshab, which requires a broadcast of the
40777 value to the variable shift insn. For constants this
40778 means a V16Q const in mem; even when we can perform the
40779 shift with one insn set the cost to prefer paddb. */
40780 if (CONSTANT_P (XEXP (x, 1)))
40782 *total = (cost->fabs
40783 + rtx_cost (XEXP (x, 0), code, 0, speed)
40784 + (speed ? 2 : COSTS_N_BYTES (16)));
40785 return true;
40787 count = 3;
40789 else if (TARGET_SSSE3)
40790 count = 7;
40791 *total = cost->fabs * count;
40793 else
40794 *total = cost->fabs;
40796 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
40798 if (CONST_INT_P (XEXP (x, 1)))
40800 if (INTVAL (XEXP (x, 1)) > 32)
40801 *total = cost->shift_const + COSTS_N_INSNS (2);
40802 else
40803 *total = cost->shift_const * 2;
40805 else
40807 if (GET_CODE (XEXP (x, 1)) == AND)
40808 *total = cost->shift_var * 2;
40809 else
40810 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
40813 else
40815 if (CONST_INT_P (XEXP (x, 1)))
40816 *total = cost->shift_const;
40817 else if (GET_CODE (XEXP (x, 1)) == SUBREG
40818 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
40820 /* Return the cost after shift-and truncation. */
40821 *total = cost->shift_var;
40822 return true;
40824 else
40825 *total = cost->shift_var;
40827 return false;
40829 case FMA:
40831 rtx sub;
40833 gcc_assert (FLOAT_MODE_P (mode));
40834 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
40836 /* ??? SSE scalar/vector cost should be used here. */
40837 /* ??? Bald assumption that fma has the same cost as fmul. */
40838 *total = cost->fmul;
40839 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
40841 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
40842 sub = XEXP (x, 0);
40843 if (GET_CODE (sub) == NEG)
40844 sub = XEXP (sub, 0);
40845 *total += rtx_cost (sub, FMA, 0, speed);
40847 sub = XEXP (x, 2);
40848 if (GET_CODE (sub) == NEG)
40849 sub = XEXP (sub, 0);
40850 *total += rtx_cost (sub, FMA, 2, speed);
40851 return true;
40854 case MULT:
40855 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40857 /* ??? SSE scalar cost should be used here. */
40858 *total = cost->fmul;
40859 return false;
40861 else if (X87_FLOAT_MODE_P (mode))
40863 *total = cost->fmul;
40864 return false;
40866 else if (FLOAT_MODE_P (mode))
40868 /* ??? SSE vector cost should be used here. */
40869 *total = cost->fmul;
40870 return false;
40872 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
40874 /* V*QImode is emulated with 7-13 insns. */
40875 if (mode == V16QImode || mode == V32QImode)
40877 int extra = 11;
40878 if (TARGET_XOP && mode == V16QImode)
40879 extra = 5;
40880 else if (TARGET_SSSE3)
40881 extra = 6;
40882 *total = cost->fmul * 2 + cost->fabs * extra;
40884 /* V*DImode is emulated with 5-8 insns. */
40885 else if (mode == V2DImode || mode == V4DImode)
40887 if (TARGET_XOP && mode == V2DImode)
40888 *total = cost->fmul * 2 + cost->fabs * 3;
40889 else
40890 *total = cost->fmul * 3 + cost->fabs * 5;
40892 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
40893 insns, including two PMULUDQ. */
40894 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
40895 *total = cost->fmul * 2 + cost->fabs * 5;
40896 else
40897 *total = cost->fmul;
40898 return false;
40900 else
40902 rtx op0 = XEXP (x, 0);
40903 rtx op1 = XEXP (x, 1);
40904 int nbits;
40905 if (CONST_INT_P (XEXP (x, 1)))
40907 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
40908 for (nbits = 0; value != 0; value &= value - 1)
40909 nbits++;
40911 else
40912 /* This is arbitrary. */
40913 nbits = 7;
40915 /* Compute costs correctly for widening multiplication. */
40916 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
40917 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
40918 == GET_MODE_SIZE (mode))
40920 int is_mulwiden = 0;
40921 enum machine_mode inner_mode = GET_MODE (op0);
40923 if (GET_CODE (op0) == GET_CODE (op1))
40924 is_mulwiden = 1, op1 = XEXP (op1, 0);
40925 else if (CONST_INT_P (op1))
40927 if (GET_CODE (op0) == SIGN_EXTEND)
40928 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
40929 == INTVAL (op1);
40930 else
40931 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
40934 if (is_mulwiden)
40935 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
40938 *total = (cost->mult_init[MODE_INDEX (mode)]
40939 + nbits * cost->mult_bit
40940 + rtx_cost (op0, outer_code, opno, speed)
40941 + rtx_cost (op1, outer_code, opno, speed));
40943 return true;
40946 case DIV:
40947 case UDIV:
40948 case MOD:
40949 case UMOD:
40950 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
40951 /* ??? SSE cost should be used here. */
40952 *total = cost->fdiv;
40953 else if (X87_FLOAT_MODE_P (mode))
40954 *total = cost->fdiv;
40955 else if (FLOAT_MODE_P (mode))
40956 /* ??? SSE vector cost should be used here. */
40957 *total = cost->fdiv;
40958 else
40959 *total = cost->divide[MODE_INDEX (mode)];
40960 return false;
40962 case PLUS:
40963 if (GET_MODE_CLASS (mode) == MODE_INT
40964 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
40966 if (GET_CODE (XEXP (x, 0)) == PLUS
40967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
40968 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
40969 && CONSTANT_P (XEXP (x, 1)))
40971 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
40972 if (val == 2 || val == 4 || val == 8)
40974 *total = cost->lea;
40975 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
40976 outer_code, opno, speed);
40977 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
40978 outer_code, opno, speed);
40979 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
40980 return true;
40983 else if (GET_CODE (XEXP (x, 0)) == MULT
40984 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
40986 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
40987 if (val == 2 || val == 4 || val == 8)
40989 *total = cost->lea;
40990 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
40991 outer_code, opno, speed);
40992 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
40993 return true;
40996 else if (GET_CODE (XEXP (x, 0)) == PLUS)
40998 *total = cost->lea;
40999 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
41000 outer_code, opno, speed);
41001 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
41002 outer_code, opno, speed);
41003 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
41004 return true;
41007 /* FALLTHRU */
41009 case MINUS:
41010 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41012 /* ??? SSE cost should be used here. */
41013 *total = cost->fadd;
41014 return false;
41016 else if (X87_FLOAT_MODE_P (mode))
41018 *total = cost->fadd;
41019 return false;
41021 else if (FLOAT_MODE_P (mode))
41023 /* ??? SSE vector cost should be used here. */
41024 *total = cost->fadd;
41025 return false;
41027 /* FALLTHRU */
41029 case AND:
41030 case IOR:
41031 case XOR:
41032 if (GET_MODE_CLASS (mode) == MODE_INT
41033 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41035 *total = (cost->add * 2
41036 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
41037 << (GET_MODE (XEXP (x, 0)) != DImode))
41038 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
41039 << (GET_MODE (XEXP (x, 1)) != DImode)));
41040 return true;
41042 /* FALLTHRU */
41044 case NEG:
41045 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41047 /* ??? SSE cost should be used here. */
41048 *total = cost->fchs;
41049 return false;
41051 else if (X87_FLOAT_MODE_P (mode))
41053 *total = cost->fchs;
41054 return false;
41056 else if (FLOAT_MODE_P (mode))
41058 /* ??? SSE vector cost should be used here. */
41059 *total = cost->fchs;
41060 return false;
41062 /* FALLTHRU */
41064 case NOT:
41065 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
41067 /* ??? Should be SSE vector operation cost. */
41068 /* At least for published AMD latencies, this really is the same
41069 as the latency for a simple fpu operation like fabs. */
41070 *total = cost->fabs;
41072 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41073 *total = cost->add * 2;
41074 else
41075 *total = cost->add;
41076 return false;
41078 case COMPARE:
41079 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
41080 && XEXP (XEXP (x, 0), 1) == const1_rtx
41081 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
41082 && XEXP (x, 1) == const0_rtx)
41084 /* This kind of construct is implemented using test[bwl].
41085 Treat it as if we had an AND. */
41086 *total = (cost->add
41087 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
41088 + rtx_cost (const1_rtx, outer_code, opno, speed));
41089 return true;
41091 return false;
41093 case FLOAT_EXTEND:
41094 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
41095 *total = 0;
41096 return false;
41098 case ABS:
41099 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41100 /* ??? SSE cost should be used here. */
41101 *total = cost->fabs;
41102 else if (X87_FLOAT_MODE_P (mode))
41103 *total = cost->fabs;
41104 else if (FLOAT_MODE_P (mode))
41105 /* ??? SSE vector cost should be used here. */
41106 *total = cost->fabs;
41107 return false;
41109 case SQRT:
41110 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
41111 /* ??? SSE cost should be used here. */
41112 *total = cost->fsqrt;
41113 else if (X87_FLOAT_MODE_P (mode))
41114 *total = cost->fsqrt;
41115 else if (FLOAT_MODE_P (mode))
41116 /* ??? SSE vector cost should be used here. */
41117 *total = cost->fsqrt;
41118 return false;
41120 case UNSPEC:
41121 if (XINT (x, 1) == UNSPEC_TP)
41122 *total = 0;
41123 return false;
41125 case VEC_SELECT:
41126 case VEC_CONCAT:
41127 case VEC_DUPLICATE:
41128 /* ??? Assume all of these vector manipulation patterns are
41129 recognizable. In which case they all pretty much have the
41130 same cost. */
41131 *total = cost->fabs;
41132 return true;
41133 case VEC_MERGE:
41134 mask = XEXP (x, 2);
41135 /* This is masked instruction, assume the same cost,
41136 as nonmasked variant. */
41137 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
41138 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
41139 else
41140 *total = cost->fabs;
41141 return true;
41143 default:
41144 return false;
41148 #if TARGET_MACHO
41150 static int current_machopic_label_num;
41152 /* Given a symbol name and its associated stub, write out the
41153 definition of the stub. */
41155 void
41156 machopic_output_stub (FILE *file, const char *symb, const char *stub)
41158 unsigned int length;
41159 char *binder_name, *symbol_name, lazy_ptr_name[32];
41160 int label = ++current_machopic_label_num;
41162 /* For 64-bit we shouldn't get here. */
41163 gcc_assert (!TARGET_64BIT);
41165 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
41166 symb = targetm.strip_name_encoding (symb);
41168 length = strlen (stub);
41169 binder_name = XALLOCAVEC (char, length + 32);
41170 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
41172 length = strlen (symb);
41173 symbol_name = XALLOCAVEC (char, length + 32);
41174 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
41176 sprintf (lazy_ptr_name, "L%d$lz", label);
41178 if (MACHOPIC_ATT_STUB)
41179 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
41180 else if (MACHOPIC_PURE)
41181 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
41182 else
41183 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
41185 fprintf (file, "%s:\n", stub);
41186 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41188 if (MACHOPIC_ATT_STUB)
41190 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
41192 else if (MACHOPIC_PURE)
41194 /* PIC stub. */
41195 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41196 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
41197 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
41198 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
41199 label, lazy_ptr_name, label);
41200 fprintf (file, "\tjmp\t*%%ecx\n");
41202 else
41203 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
41205 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
41206 it needs no stub-binding-helper. */
41207 if (MACHOPIC_ATT_STUB)
41208 return;
41210 fprintf (file, "%s:\n", binder_name);
41212 if (MACHOPIC_PURE)
41214 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
41215 fprintf (file, "\tpushl\t%%ecx\n");
41217 else
41218 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
41220 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
41222 /* N.B. Keep the correspondence of these
41223 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
41224 old-pic/new-pic/non-pic stubs; altering this will break
41225 compatibility with existing dylibs. */
41226 if (MACHOPIC_PURE)
41228 /* 25-byte PIC stub using "CALL get_pc_thunk". */
41229 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
41231 else
41232 /* 16-byte -mdynamic-no-pic stub. */
41233 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
41235 fprintf (file, "%s:\n", lazy_ptr_name);
41236 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
41237 fprintf (file, ASM_LONG "%s\n", binder_name);
41239 #endif /* TARGET_MACHO */
41241 /* Order the registers for register allocator. */
41243 void
41244 x86_order_regs_for_local_alloc (void)
41246 int pos = 0;
41247 int i;
41249 /* First allocate the local general purpose registers. */
41250 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41251 if (GENERAL_REGNO_P (i) && call_used_regs[i])
41252 reg_alloc_order [pos++] = i;
41254 /* Global general purpose registers. */
41255 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
41256 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
41257 reg_alloc_order [pos++] = i;
41259 /* x87 registers come first in case we are doing FP math
41260 using them. */
41261 if (!TARGET_SSE_MATH)
41262 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41263 reg_alloc_order [pos++] = i;
41265 /* SSE registers. */
41266 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
41267 reg_alloc_order [pos++] = i;
41268 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
41269 reg_alloc_order [pos++] = i;
41271 /* Extended REX SSE registers. */
41272 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
41273 reg_alloc_order [pos++] = i;
41275 /* Mask register. */
41276 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
41277 reg_alloc_order [pos++] = i;
41279 /* x87 registers. */
41280 if (TARGET_SSE_MATH)
41281 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
41282 reg_alloc_order [pos++] = i;
41284 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
41285 reg_alloc_order [pos++] = i;
41287 /* Initialize the rest of array as we do not allocate some registers
41288 at all. */
41289 while (pos < FIRST_PSEUDO_REGISTER)
41290 reg_alloc_order [pos++] = 0;
41293 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
41294 in struct attribute_spec handler. */
41295 static tree
41296 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
41297 tree args,
41298 int flags ATTRIBUTE_UNUSED,
41299 bool *no_add_attrs)
41301 if (TREE_CODE (*node) != FUNCTION_TYPE
41302 && TREE_CODE (*node) != METHOD_TYPE
41303 && TREE_CODE (*node) != FIELD_DECL
41304 && TREE_CODE (*node) != TYPE_DECL)
41306 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41307 name);
41308 *no_add_attrs = true;
41309 return NULL_TREE;
41311 if (TARGET_64BIT)
41313 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
41314 name);
41315 *no_add_attrs = true;
41316 return NULL_TREE;
41318 if (is_attribute_p ("callee_pop_aggregate_return", name))
41320 tree cst;
41322 cst = TREE_VALUE (args);
41323 if (TREE_CODE (cst) != INTEGER_CST)
41325 warning (OPT_Wattributes,
41326 "%qE attribute requires an integer constant argument",
41327 name);
41328 *no_add_attrs = true;
41330 else if (compare_tree_int (cst, 0) != 0
41331 && compare_tree_int (cst, 1) != 0)
41333 warning (OPT_Wattributes,
41334 "argument to %qE attribute is neither zero, nor one",
41335 name);
41336 *no_add_attrs = true;
41339 return NULL_TREE;
41342 return NULL_TREE;
41345 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
41346 struct attribute_spec.handler. */
41347 static tree
41348 ix86_handle_abi_attribute (tree *node, tree name,
41349 tree args ATTRIBUTE_UNUSED,
41350 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
41352 if (TREE_CODE (*node) != FUNCTION_TYPE
41353 && TREE_CODE (*node) != METHOD_TYPE
41354 && TREE_CODE (*node) != FIELD_DECL
41355 && TREE_CODE (*node) != TYPE_DECL)
41357 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41358 name);
41359 *no_add_attrs = true;
41360 return NULL_TREE;
41363 /* Can combine regparm with all attributes but fastcall. */
41364 if (is_attribute_p ("ms_abi", name))
41366 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
41368 error ("ms_abi and sysv_abi attributes are not compatible");
41371 return NULL_TREE;
41373 else if (is_attribute_p ("sysv_abi", name))
41375 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
41377 error ("ms_abi and sysv_abi attributes are not compatible");
41380 return NULL_TREE;
41383 return NULL_TREE;
41386 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
41387 struct attribute_spec.handler. */
41388 static tree
41389 ix86_handle_struct_attribute (tree *node, tree name,
41390 tree args ATTRIBUTE_UNUSED,
41391 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
41393 tree *type = NULL;
41394 if (DECL_P (*node))
41396 if (TREE_CODE (*node) == TYPE_DECL)
41397 type = &TREE_TYPE (*node);
41399 else
41400 type = node;
41402 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
41404 warning (OPT_Wattributes, "%qE attribute ignored",
41405 name);
41406 *no_add_attrs = true;
41409 else if ((is_attribute_p ("ms_struct", name)
41410 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
41411 || ((is_attribute_p ("gcc_struct", name)
41412 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
41414 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
41415 name);
41416 *no_add_attrs = true;
41419 return NULL_TREE;
41422 static tree
41423 ix86_handle_fndecl_attribute (tree *node, tree name,
41424 tree args ATTRIBUTE_UNUSED,
41425 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
41427 if (TREE_CODE (*node) != FUNCTION_DECL)
41429 warning (OPT_Wattributes, "%qE attribute only applies to functions",
41430 name);
41431 *no_add_attrs = true;
41433 return NULL_TREE;
41436 static bool
41437 ix86_ms_bitfield_layout_p (const_tree record_type)
41439 return ((TARGET_MS_BITFIELD_LAYOUT
41440 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
41441 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
41444 /* Returns an expression indicating where the this parameter is
41445 located on entry to the FUNCTION. */
41447 static rtx
41448 x86_this_parameter (tree function)
41450 tree type = TREE_TYPE (function);
41451 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
41452 int nregs;
41454 if (TARGET_64BIT)
41456 const int *parm_regs;
41458 if (ix86_function_type_abi (type) == MS_ABI)
41459 parm_regs = x86_64_ms_abi_int_parameter_registers;
41460 else
41461 parm_regs = x86_64_int_parameter_registers;
41462 return gen_rtx_REG (Pmode, parm_regs[aggr]);
41465 nregs = ix86_function_regparm (type, function);
41467 if (nregs > 0 && !stdarg_p (type))
41469 int regno;
41470 unsigned int ccvt = ix86_get_callcvt (type);
41472 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41473 regno = aggr ? DX_REG : CX_REG;
41474 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41476 regno = CX_REG;
41477 if (aggr)
41478 return gen_rtx_MEM (SImode,
41479 plus_constant (Pmode, stack_pointer_rtx, 4));
41481 else
41483 regno = AX_REG;
41484 if (aggr)
41486 regno = DX_REG;
41487 if (nregs == 1)
41488 return gen_rtx_MEM (SImode,
41489 plus_constant (Pmode,
41490 stack_pointer_rtx, 4));
41493 return gen_rtx_REG (SImode, regno);
41496 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
41497 aggr ? 8 : 4));
41500 /* Determine whether x86_output_mi_thunk can succeed. */
41502 static bool
41503 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
41504 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
41505 HOST_WIDE_INT vcall_offset, const_tree function)
41507 /* 64-bit can handle anything. */
41508 if (TARGET_64BIT)
41509 return true;
41511 /* For 32-bit, everything's fine if we have one free register. */
41512 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
41513 return true;
41515 /* Need a free register for vcall_offset. */
41516 if (vcall_offset)
41517 return false;
41519 /* Need a free register for GOT references. */
41520 if (flag_pic && !targetm.binds_local_p (function))
41521 return false;
41523 /* Otherwise ok. */
41524 return true;
41527 /* Output the assembler code for a thunk function. THUNK_DECL is the
41528 declaration for the thunk function itself, FUNCTION is the decl for
41529 the target function. DELTA is an immediate constant offset to be
41530 added to THIS. If VCALL_OFFSET is nonzero, the word at
41531 *(*this + vcall_offset) should be added to THIS. */
41533 static void
41534 x86_output_mi_thunk (FILE *file,
41535 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
41536 HOST_WIDE_INT vcall_offset, tree function)
41538 rtx this_param = x86_this_parameter (function);
41539 rtx this_reg, tmp, fnaddr;
41540 unsigned int tmp_regno;
41542 if (TARGET_64BIT)
41543 tmp_regno = R10_REG;
41544 else
41546 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
41547 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
41548 tmp_regno = AX_REG;
41549 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
41550 tmp_regno = DX_REG;
41551 else
41552 tmp_regno = CX_REG;
41555 emit_note (NOTE_INSN_PROLOGUE_END);
41557 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
41558 pull it in now and let DELTA benefit. */
41559 if (REG_P (this_param))
41560 this_reg = this_param;
41561 else if (vcall_offset)
41563 /* Put the this parameter into %eax. */
41564 this_reg = gen_rtx_REG (Pmode, AX_REG);
41565 emit_move_insn (this_reg, this_param);
41567 else
41568 this_reg = NULL_RTX;
41570 /* Adjust the this parameter by a fixed constant. */
41571 if (delta)
41573 rtx delta_rtx = GEN_INT (delta);
41574 rtx delta_dst = this_reg ? this_reg : this_param;
41576 if (TARGET_64BIT)
41578 if (!x86_64_general_operand (delta_rtx, Pmode))
41580 tmp = gen_rtx_REG (Pmode, tmp_regno);
41581 emit_move_insn (tmp, delta_rtx);
41582 delta_rtx = tmp;
41586 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
41589 /* Adjust the this parameter by a value stored in the vtable. */
41590 if (vcall_offset)
41592 rtx vcall_addr, vcall_mem, this_mem;
41594 tmp = gen_rtx_REG (Pmode, tmp_regno);
41596 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
41597 if (Pmode != ptr_mode)
41598 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
41599 emit_move_insn (tmp, this_mem);
41601 /* Adjust the this parameter. */
41602 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
41603 if (TARGET_64BIT
41604 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
41606 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
41607 emit_move_insn (tmp2, GEN_INT (vcall_offset));
41608 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
41611 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
41612 if (Pmode != ptr_mode)
41613 emit_insn (gen_addsi_1_zext (this_reg,
41614 gen_rtx_REG (ptr_mode,
41615 REGNO (this_reg)),
41616 vcall_mem));
41617 else
41618 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
41621 /* If necessary, drop THIS back to its stack slot. */
41622 if (this_reg && this_reg != this_param)
41623 emit_move_insn (this_param, this_reg);
41625 fnaddr = XEXP (DECL_RTL (function), 0);
41626 if (TARGET_64BIT)
41628 if (!flag_pic || targetm.binds_local_p (function)
41629 || TARGET_PECOFF)
41631 else
41633 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
41634 tmp = gen_rtx_CONST (Pmode, tmp);
41635 fnaddr = gen_const_mem (Pmode, tmp);
41638 else
41640 if (!flag_pic || targetm.binds_local_p (function))
41642 #if TARGET_MACHO
41643 else if (TARGET_MACHO)
41645 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
41646 fnaddr = XEXP (fnaddr, 0);
41648 #endif /* TARGET_MACHO */
41649 else
41651 tmp = gen_rtx_REG (Pmode, CX_REG);
41652 output_set_got (tmp, NULL_RTX);
41654 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
41655 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
41656 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
41657 fnaddr = gen_const_mem (Pmode, fnaddr);
41661 /* Our sibling call patterns do not allow memories, because we have no
41662 predicate that can distinguish between frame and non-frame memory.
41663 For our purposes here, we can get away with (ab)using a jump pattern,
41664 because we're going to do no optimization. */
41665 if (MEM_P (fnaddr))
41667 if (sibcall_insn_operand (fnaddr, word_mode))
41669 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
41670 tmp = emit_call_insn (tmp);
41671 SIBLING_CALL_P (tmp) = 1;
41673 else
41674 emit_jump_insn (gen_indirect_jump (fnaddr));
41676 else
41678 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
41679 fnaddr = legitimize_pic_address (fnaddr,
41680 gen_rtx_REG (Pmode, tmp_regno));
41682 if (!sibcall_insn_operand (fnaddr, word_mode))
41684 tmp = gen_rtx_REG (word_mode, tmp_regno);
41685 if (GET_MODE (fnaddr) != word_mode)
41686 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
41687 emit_move_insn (tmp, fnaddr);
41688 fnaddr = tmp;
41691 tmp = gen_rtx_MEM (QImode, fnaddr);
41692 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
41693 tmp = emit_call_insn (tmp);
41694 SIBLING_CALL_P (tmp) = 1;
41696 emit_barrier ();
41698 /* Emit just enough of rest_of_compilation to get the insns emitted.
41699 Note that use_thunk calls assemble_start_function et al. */
41700 tmp = get_insns ();
41701 shorten_branches (tmp);
41702 final_start_function (tmp, file, 1);
41703 final (tmp, file, 1);
41704 final_end_function ();
41707 static void
41708 x86_file_start (void)
41710 default_file_start ();
41711 if (TARGET_16BIT)
41712 fputs ("\t.code16gcc\n", asm_out_file);
41713 #if TARGET_MACHO
41714 darwin_file_start ();
41715 #endif
41716 if (X86_FILE_START_VERSION_DIRECTIVE)
41717 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
41718 if (X86_FILE_START_FLTUSED)
41719 fputs ("\t.global\t__fltused\n", asm_out_file);
41720 if (ix86_asm_dialect == ASM_INTEL)
41721 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
41725 x86_field_alignment (tree field, int computed)
41727 enum machine_mode mode;
41728 tree type = TREE_TYPE (field);
41730 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
41731 return computed;
41732 mode = TYPE_MODE (strip_array_types (type));
41733 if (mode == DFmode || mode == DCmode
41734 || GET_MODE_CLASS (mode) == MODE_INT
41735 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
41736 return MIN (32, computed);
41737 return computed;
41740 /* Output assembler code to FILE to increment profiler label # LABELNO
41741 for profiling a function entry. */
41742 void
41743 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
41745 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
41746 : MCOUNT_NAME);
41748 if (TARGET_64BIT)
41750 #ifndef NO_PROFILE_COUNTERS
41751 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
41752 #endif
41754 if (!TARGET_PECOFF && flag_pic)
41755 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
41756 else
41757 fprintf (file, "\tcall\t%s\n", mcount_name);
41759 else if (flag_pic)
41761 #ifndef NO_PROFILE_COUNTERS
41762 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
41763 LPREFIX, labelno);
41764 #endif
41765 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
41767 else
41769 #ifndef NO_PROFILE_COUNTERS
41770 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
41771 LPREFIX, labelno);
41772 #endif
41773 fprintf (file, "\tcall\t%s\n", mcount_name);
41777 /* We don't have exact information about the insn sizes, but we may assume
41778 quite safely that we are informed about all 1 byte insns and memory
41779 address sizes. This is enough to eliminate unnecessary padding in
41780 99% of cases. */
41782 static int
41783 min_insn_size (rtx insn)
41785 int l = 0, len;
41787 if (!INSN_P (insn) || !active_insn_p (insn))
41788 return 0;
41790 /* Discard alignments we've emit and jump instructions. */
41791 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
41792 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
41793 return 0;
41795 /* Important case - calls are always 5 bytes.
41796 It is common to have many calls in the row. */
41797 if (CALL_P (insn)
41798 && symbolic_reference_mentioned_p (PATTERN (insn))
41799 && !SIBLING_CALL_P (insn))
41800 return 5;
41801 len = get_attr_length (insn);
41802 if (len <= 1)
41803 return 1;
41805 /* For normal instructions we rely on get_attr_length being exact,
41806 with a few exceptions. */
41807 if (!JUMP_P (insn))
41809 enum attr_type type = get_attr_type (insn);
41811 switch (type)
41813 case TYPE_MULTI:
41814 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
41815 || asm_noperands (PATTERN (insn)) >= 0)
41816 return 0;
41817 break;
41818 case TYPE_OTHER:
41819 case TYPE_FCMP:
41820 break;
41821 default:
41822 /* Otherwise trust get_attr_length. */
41823 return len;
41826 l = get_attr_length_address (insn);
41827 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
41828 l = 4;
41830 if (l)
41831 return 1+l;
41832 else
41833 return 2;
41836 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
41838 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
41839 window. */
41841 static void
41842 ix86_avoid_jump_mispredicts (void)
41844 rtx insn, start = get_insns ();
41845 int nbytes = 0, njumps = 0;
41846 int isjump = 0;
41848 /* Look for all minimal intervals of instructions containing 4 jumps.
41849 The intervals are bounded by START and INSN. NBYTES is the total
41850 size of instructions in the interval including INSN and not including
41851 START. When the NBYTES is smaller than 16 bytes, it is possible
41852 that the end of START and INSN ends up in the same 16byte page.
41854 The smallest offset in the page INSN can start is the case where START
41855 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
41856 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
41858 Don't consider asm goto as jump, while it can contain a jump, it doesn't
41859 have to, control transfer to label(s) can be performed through other
41860 means, and also we estimate minimum length of all asm stmts as 0. */
41861 for (insn = start; insn; insn = NEXT_INSN (insn))
41863 int min_size;
41865 if (LABEL_P (insn))
41867 int align = label_to_alignment (insn);
41868 int max_skip = label_to_max_skip (insn);
41870 if (max_skip > 15)
41871 max_skip = 15;
41872 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
41873 already in the current 16 byte page, because otherwise
41874 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
41875 bytes to reach 16 byte boundary. */
41876 if (align <= 0
41877 || (align <= 3 && max_skip != (1 << align) - 1))
41878 max_skip = 0;
41879 if (dump_file)
41880 fprintf (dump_file, "Label %i with max_skip %i\n",
41881 INSN_UID (insn), max_skip);
41882 if (max_skip)
41884 while (nbytes + max_skip >= 16)
41886 start = NEXT_INSN (start);
41887 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
41888 || CALL_P (start))
41889 njumps--, isjump = 1;
41890 else
41891 isjump = 0;
41892 nbytes -= min_insn_size (start);
41895 continue;
41898 min_size = min_insn_size (insn);
41899 nbytes += min_size;
41900 if (dump_file)
41901 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
41902 INSN_UID (insn), min_size);
41903 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
41904 || CALL_P (insn))
41905 njumps++;
41906 else
41907 continue;
41909 while (njumps > 3)
41911 start = NEXT_INSN (start);
41912 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
41913 || CALL_P (start))
41914 njumps--, isjump = 1;
41915 else
41916 isjump = 0;
41917 nbytes -= min_insn_size (start);
41919 gcc_assert (njumps >= 0);
41920 if (dump_file)
41921 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
41922 INSN_UID (start), INSN_UID (insn), nbytes);
41924 if (njumps == 3 && isjump && nbytes < 16)
41926 int padsize = 15 - nbytes + min_insn_size (insn);
41928 if (dump_file)
41929 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
41930 INSN_UID (insn), padsize);
41931 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
41935 #endif
41937 /* AMD Athlon works faster
41938 when RET is not destination of conditional jump or directly preceded
41939 by other jump instruction. We avoid the penalty by inserting NOP just
41940 before the RET instructions in such cases. */
41941 static void
41942 ix86_pad_returns (void)
41944 edge e;
41945 edge_iterator ei;
41947 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
41949 basic_block bb = e->src;
41950 rtx ret = BB_END (bb);
41951 rtx prev;
41952 bool replace = false;
41954 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
41955 || optimize_bb_for_size_p (bb))
41956 continue;
41957 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
41958 if (active_insn_p (prev) || LABEL_P (prev))
41959 break;
41960 if (prev && LABEL_P (prev))
41962 edge e;
41963 edge_iterator ei;
41965 FOR_EACH_EDGE (e, ei, bb->preds)
41966 if (EDGE_FREQUENCY (e) && e->src->index >= 0
41967 && !(e->flags & EDGE_FALLTHRU))
41969 replace = true;
41970 break;
41973 if (!replace)
41975 prev = prev_active_insn (ret);
41976 if (prev
41977 && ((JUMP_P (prev) && any_condjump_p (prev))
41978 || CALL_P (prev)))
41979 replace = true;
41980 /* Empty functions get branch mispredict even when
41981 the jump destination is not visible to us. */
41982 if (!prev && !optimize_function_for_size_p (cfun))
41983 replace = true;
41985 if (replace)
41987 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
41988 delete_insn (ret);
41993 /* Count the minimum number of instructions in BB. Return 4 if the
41994 number of instructions >= 4. */
41996 static int
41997 ix86_count_insn_bb (basic_block bb)
41999 rtx insn;
42000 int insn_count = 0;
42002 /* Count number of instructions in this block. Return 4 if the number
42003 of instructions >= 4. */
42004 FOR_BB_INSNS (bb, insn)
42006 /* Only happen in exit blocks. */
42007 if (JUMP_P (insn)
42008 && ANY_RETURN_P (PATTERN (insn)))
42009 break;
42011 if (NONDEBUG_INSN_P (insn)
42012 && GET_CODE (PATTERN (insn)) != USE
42013 && GET_CODE (PATTERN (insn)) != CLOBBER)
42015 insn_count++;
42016 if (insn_count >= 4)
42017 return insn_count;
42021 return insn_count;
42025 /* Count the minimum number of instructions in code path in BB.
42026 Return 4 if the number of instructions >= 4. */
42028 static int
42029 ix86_count_insn (basic_block bb)
42031 edge e;
42032 edge_iterator ei;
42033 int min_prev_count;
42035 /* Only bother counting instructions along paths with no
42036 more than 2 basic blocks between entry and exit. Given
42037 that BB has an edge to exit, determine if a predecessor
42038 of BB has an edge from entry. If so, compute the number
42039 of instructions in the predecessor block. If there
42040 happen to be multiple such blocks, compute the minimum. */
42041 min_prev_count = 4;
42042 FOR_EACH_EDGE (e, ei, bb->preds)
42044 edge prev_e;
42045 edge_iterator prev_ei;
42047 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42049 min_prev_count = 0;
42050 break;
42052 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
42054 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
42056 int count = ix86_count_insn_bb (e->src);
42057 if (count < min_prev_count)
42058 min_prev_count = count;
42059 break;
42064 if (min_prev_count < 4)
42065 min_prev_count += ix86_count_insn_bb (bb);
42067 return min_prev_count;
42070 /* Pad short function to 4 instructions. */
42072 static void
42073 ix86_pad_short_function (void)
42075 edge e;
42076 edge_iterator ei;
42078 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42080 rtx ret = BB_END (e->src);
42081 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
42083 int insn_count = ix86_count_insn (e->src);
42085 /* Pad short function. */
42086 if (insn_count < 4)
42088 rtx insn = ret;
42090 /* Find epilogue. */
42091 while (insn
42092 && (!NOTE_P (insn)
42093 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
42094 insn = PREV_INSN (insn);
42096 if (!insn)
42097 insn = ret;
42099 /* Two NOPs count as one instruction. */
42100 insn_count = 2 * (4 - insn_count);
42101 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
42107 /* Fix up a Windows system unwinder issue. If an EH region falls through into
42108 the epilogue, the Windows system unwinder will apply epilogue logic and
42109 produce incorrect offsets. This can be avoided by adding a nop between
42110 the last insn that can throw and the first insn of the epilogue. */
42112 static void
42113 ix86_seh_fixup_eh_fallthru (void)
42115 edge e;
42116 edge_iterator ei;
42118 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
42120 rtx insn, next;
42122 /* Find the beginning of the epilogue. */
42123 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
42124 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
42125 break;
42126 if (insn == NULL)
42127 continue;
42129 /* We only care about preceding insns that can throw. */
42130 insn = prev_active_insn (insn);
42131 if (insn == NULL || !can_throw_internal (insn))
42132 continue;
42134 /* Do not separate calls from their debug information. */
42135 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
42136 if (NOTE_P (next)
42137 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
42138 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
42139 insn = next;
42140 else
42141 break;
42143 emit_insn_after (gen_nops (const1_rtx), insn);
42147 /* Implement machine specific optimizations. We implement padding of returns
42148 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
42149 static void
42150 ix86_reorg (void)
42152 /* We are freeing block_for_insn in the toplev to keep compatibility
42153 with old MDEP_REORGS that are not CFG based. Recompute it now. */
42154 compute_bb_for_insn ();
42156 if (TARGET_SEH && current_function_has_exception_handlers ())
42157 ix86_seh_fixup_eh_fallthru ();
42159 if (optimize && optimize_function_for_speed_p (cfun))
42161 if (TARGET_PAD_SHORT_FUNCTION)
42162 ix86_pad_short_function ();
42163 else if (TARGET_PAD_RETURNS)
42164 ix86_pad_returns ();
42165 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
42166 if (TARGET_FOUR_JUMP_LIMIT)
42167 ix86_avoid_jump_mispredicts ();
42168 #endif
42172 /* Return nonzero when QImode register that must be represented via REX prefix
42173 is used. */
42174 bool
42175 x86_extended_QIreg_mentioned_p (rtx insn)
42177 int i;
42178 extract_insn_cached (insn);
42179 for (i = 0; i < recog_data.n_operands; i++)
42180 if (GENERAL_REG_P (recog_data.operand[i])
42181 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
42182 return true;
42183 return false;
42186 /* Return nonzero when P points to register encoded via REX prefix.
42187 Called via for_each_rtx. */
42188 static int
42189 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
42191 unsigned int regno;
42192 if (!REG_P (*p))
42193 return 0;
42194 regno = REGNO (*p);
42195 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
42198 /* Return true when INSN mentions register that must be encoded using REX
42199 prefix. */
42200 bool
42201 x86_extended_reg_mentioned_p (rtx insn)
42203 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
42204 extended_reg_mentioned_1, NULL);
42207 /* If profitable, negate (without causing overflow) integer constant
42208 of mode MODE at location LOC. Return true in this case. */
42209 bool
42210 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
42212 HOST_WIDE_INT val;
42214 if (!CONST_INT_P (*loc))
42215 return false;
42217 switch (mode)
42219 case DImode:
42220 /* DImode x86_64 constants must fit in 32 bits. */
42221 gcc_assert (x86_64_immediate_operand (*loc, mode));
42223 mode = SImode;
42224 break;
42226 case SImode:
42227 case HImode:
42228 case QImode:
42229 break;
42231 default:
42232 gcc_unreachable ();
42235 /* Avoid overflows. */
42236 if (mode_signbit_p (mode, *loc))
42237 return false;
42239 val = INTVAL (*loc);
42241 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
42242 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
42243 if ((val < 0 && val != -128)
42244 || val == 128)
42246 *loc = GEN_INT (-val);
42247 return true;
42250 return false;
42253 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
42254 optabs would emit if we didn't have TFmode patterns. */
42256 void
42257 x86_emit_floatuns (rtx operands[2])
42259 rtx neglab, donelab, i0, i1, f0, in, out;
42260 enum machine_mode mode, inmode;
42262 inmode = GET_MODE (operands[1]);
42263 gcc_assert (inmode == SImode || inmode == DImode);
42265 out = operands[0];
42266 in = force_reg (inmode, operands[1]);
42267 mode = GET_MODE (out);
42268 neglab = gen_label_rtx ();
42269 donelab = gen_label_rtx ();
42270 f0 = gen_reg_rtx (mode);
42272 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
42274 expand_float (out, in, 0);
42276 emit_jump_insn (gen_jump (donelab));
42277 emit_barrier ();
42279 emit_label (neglab);
42281 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
42282 1, OPTAB_DIRECT);
42283 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
42284 1, OPTAB_DIRECT);
42285 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
42287 expand_float (f0, i0, 0);
42289 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
42291 emit_label (donelab);
42294 /* AVX512F does support 64-byte integer vector operations,
42295 thus the longest vector we are faced with is V64QImode. */
42296 #define MAX_VECT_LEN 64
42298 struct expand_vec_perm_d
42300 rtx target, op0, op1;
42301 unsigned char perm[MAX_VECT_LEN];
42302 enum machine_mode vmode;
42303 unsigned char nelt;
42304 bool one_operand_p;
42305 bool testing_p;
42308 static bool canonicalize_perm (struct expand_vec_perm_d *d);
42309 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
42310 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
42312 /* Get a vector mode of the same size as the original but with elements
42313 twice as wide. This is only guaranteed to apply to integral vectors. */
42315 static inline enum machine_mode
42316 get_mode_wider_vector (enum machine_mode o)
42318 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
42319 enum machine_mode n = GET_MODE_WIDER_MODE (o);
42320 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
42321 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
42322 return n;
42325 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
42326 fill target with val via vec_duplicate. */
42328 static bool
42329 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
42331 bool ok;
42332 rtx insn, dup;
42334 /* First attempt to recognize VAL as-is. */
42335 dup = gen_rtx_VEC_DUPLICATE (mode, val);
42336 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
42337 if (recog_memoized (insn) < 0)
42339 rtx seq;
42340 /* If that fails, force VAL into a register. */
42342 start_sequence ();
42343 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
42344 seq = get_insns ();
42345 end_sequence ();
42346 if (seq)
42347 emit_insn_before (seq, insn);
42349 ok = recog_memoized (insn) >= 0;
42350 gcc_assert (ok);
42352 return true;
42355 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42356 with all elements equal to VAR. Return true if successful. */
42358 static bool
42359 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
42360 rtx target, rtx val)
42362 bool ok;
42364 switch (mode)
42366 case V2SImode:
42367 case V2SFmode:
42368 if (!mmx_ok)
42369 return false;
42370 /* FALLTHRU */
42372 case V4DFmode:
42373 case V4DImode:
42374 case V8SFmode:
42375 case V8SImode:
42376 case V2DFmode:
42377 case V64QImode:
42378 case V32HImode:
42379 case V2DImode:
42380 case V4SFmode:
42381 case V4SImode:
42382 case V16SImode:
42383 case V8DImode:
42384 case V16SFmode:
42385 case V8DFmode:
42386 return ix86_vector_duplicate_value (mode, target, val);
42388 case V4HImode:
42389 if (!mmx_ok)
42390 return false;
42391 if (TARGET_SSE || TARGET_3DNOW_A)
42393 rtx x;
42395 val = gen_lowpart (SImode, val);
42396 x = gen_rtx_TRUNCATE (HImode, val);
42397 x = gen_rtx_VEC_DUPLICATE (mode, x);
42398 emit_insn (gen_rtx_SET (VOIDmode, target, x));
42399 return true;
42401 goto widen;
42403 case V8QImode:
42404 if (!mmx_ok)
42405 return false;
42406 goto widen;
42408 case V8HImode:
42409 if (TARGET_AVX512VL)
42410 return ix86_vector_duplicate_value (mode, target, val);
42412 if (TARGET_SSE2)
42414 struct expand_vec_perm_d dperm;
42415 rtx tmp1, tmp2;
42417 permute:
42418 memset (&dperm, 0, sizeof (dperm));
42419 dperm.target = target;
42420 dperm.vmode = mode;
42421 dperm.nelt = GET_MODE_NUNITS (mode);
42422 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
42423 dperm.one_operand_p = true;
42425 /* Extend to SImode using a paradoxical SUBREG. */
42426 tmp1 = gen_reg_rtx (SImode);
42427 emit_move_insn (tmp1, gen_lowpart (SImode, val));
42429 /* Insert the SImode value as low element of a V4SImode vector. */
42430 tmp2 = gen_reg_rtx (V4SImode);
42431 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
42432 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
42434 ok = (expand_vec_perm_1 (&dperm)
42435 || expand_vec_perm_broadcast_1 (&dperm));
42436 gcc_assert (ok);
42437 return ok;
42439 goto widen;
42441 case V16QImode:
42442 if (TARGET_AVX512VL)
42443 return ix86_vector_duplicate_value (mode, target, val);
42445 if (TARGET_SSE2)
42446 goto permute;
42447 goto widen;
42449 widen:
42450 /* Replicate the value once into the next wider mode and recurse. */
42452 enum machine_mode smode, wsmode, wvmode;
42453 rtx x;
42455 smode = GET_MODE_INNER (mode);
42456 wvmode = get_mode_wider_vector (mode);
42457 wsmode = GET_MODE_INNER (wvmode);
42459 val = convert_modes (wsmode, smode, val, true);
42460 x = expand_simple_binop (wsmode, ASHIFT, val,
42461 GEN_INT (GET_MODE_BITSIZE (smode)),
42462 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42463 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
42465 x = gen_reg_rtx (wvmode);
42466 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
42467 gcc_assert (ok);
42468 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
42469 return ok;
42472 case V16HImode:
42473 case V32QImode:
42474 if (TARGET_AVX512VL)
42475 return ix86_vector_duplicate_value (mode, target, val);
42476 else
42478 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
42479 rtx x = gen_reg_rtx (hvmode);
42481 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
42482 gcc_assert (ok);
42484 x = gen_rtx_VEC_CONCAT (mode, x, x);
42485 emit_insn (gen_rtx_SET (VOIDmode, target, x));
42487 return true;
42489 default:
42490 return false;
42494 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42495 whose ONE_VAR element is VAR, and other elements are zero. Return true
42496 if successful. */
42498 static bool
42499 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
42500 rtx target, rtx var, int one_var)
42502 enum machine_mode vsimode;
42503 rtx new_target;
42504 rtx x, tmp;
42505 bool use_vector_set = false;
42507 switch (mode)
42509 case V2DImode:
42510 /* For SSE4.1, we normally use vector set. But if the second
42511 element is zero and inter-unit moves are OK, we use movq
42512 instead. */
42513 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
42514 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
42515 && one_var == 0));
42516 break;
42517 case V16QImode:
42518 case V4SImode:
42519 case V4SFmode:
42520 use_vector_set = TARGET_SSE4_1;
42521 break;
42522 case V8HImode:
42523 use_vector_set = TARGET_SSE2;
42524 break;
42525 case V4HImode:
42526 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
42527 break;
42528 case V32QImode:
42529 case V16HImode:
42530 case V8SImode:
42531 case V8SFmode:
42532 case V4DFmode:
42533 use_vector_set = TARGET_AVX;
42534 break;
42535 case V4DImode:
42536 /* Use ix86_expand_vector_set in 64bit mode only. */
42537 use_vector_set = TARGET_AVX && TARGET_64BIT;
42538 break;
42539 default:
42540 break;
42543 if (use_vector_set)
42545 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
42546 var = force_reg (GET_MODE_INNER (mode), var);
42547 ix86_expand_vector_set (mmx_ok, target, var, one_var);
42548 return true;
42551 switch (mode)
42553 case V2SFmode:
42554 case V2SImode:
42555 if (!mmx_ok)
42556 return false;
42557 /* FALLTHRU */
42559 case V2DFmode:
42560 case V2DImode:
42561 if (one_var != 0)
42562 return false;
42563 var = force_reg (GET_MODE_INNER (mode), var);
42564 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
42565 emit_insn (gen_rtx_SET (VOIDmode, target, x));
42566 return true;
42568 case V4SFmode:
42569 case V4SImode:
42570 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
42571 new_target = gen_reg_rtx (mode);
42572 else
42573 new_target = target;
42574 var = force_reg (GET_MODE_INNER (mode), var);
42575 x = gen_rtx_VEC_DUPLICATE (mode, var);
42576 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
42577 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
42578 if (one_var != 0)
42580 /* We need to shuffle the value to the correct position, so
42581 create a new pseudo to store the intermediate result. */
42583 /* With SSE2, we can use the integer shuffle insns. */
42584 if (mode != V4SFmode && TARGET_SSE2)
42586 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
42587 const1_rtx,
42588 GEN_INT (one_var == 1 ? 0 : 1),
42589 GEN_INT (one_var == 2 ? 0 : 1),
42590 GEN_INT (one_var == 3 ? 0 : 1)));
42591 if (target != new_target)
42592 emit_move_insn (target, new_target);
42593 return true;
42596 /* Otherwise convert the intermediate result to V4SFmode and
42597 use the SSE1 shuffle instructions. */
42598 if (mode != V4SFmode)
42600 tmp = gen_reg_rtx (V4SFmode);
42601 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
42603 else
42604 tmp = new_target;
42606 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
42607 const1_rtx,
42608 GEN_INT (one_var == 1 ? 0 : 1),
42609 GEN_INT (one_var == 2 ? 0+4 : 1+4),
42610 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
42612 if (mode != V4SFmode)
42613 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
42614 else if (tmp != target)
42615 emit_move_insn (target, tmp);
42617 else if (target != new_target)
42618 emit_move_insn (target, new_target);
42619 return true;
42621 case V8HImode:
42622 case V16QImode:
42623 vsimode = V4SImode;
42624 goto widen;
42625 case V4HImode:
42626 case V8QImode:
42627 if (!mmx_ok)
42628 return false;
42629 vsimode = V2SImode;
42630 goto widen;
42631 widen:
42632 if (one_var != 0)
42633 return false;
42635 /* Zero extend the variable element to SImode and recurse. */
42636 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
42638 x = gen_reg_rtx (vsimode);
42639 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
42640 var, one_var))
42641 gcc_unreachable ();
42643 emit_move_insn (target, gen_lowpart (mode, x));
42644 return true;
42646 default:
42647 return false;
42651 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
42652 consisting of the values in VALS. It is known that all elements
42653 except ONE_VAR are constants. Return true if successful. */
42655 static bool
42656 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
42657 rtx target, rtx vals, int one_var)
42659 rtx var = XVECEXP (vals, 0, one_var);
42660 enum machine_mode wmode;
42661 rtx const_vec, x;
42663 const_vec = copy_rtx (vals);
42664 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
42665 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
42667 switch (mode)
42669 case V2DFmode:
42670 case V2DImode:
42671 case V2SFmode:
42672 case V2SImode:
42673 /* For the two element vectors, it's just as easy to use
42674 the general case. */
42675 return false;
42677 case V4DImode:
42678 /* Use ix86_expand_vector_set in 64bit mode only. */
42679 if (!TARGET_64BIT)
42680 return false;
42681 case V4DFmode:
42682 case V8SFmode:
42683 case V8SImode:
42684 case V16HImode:
42685 case V32QImode:
42686 case V4SFmode:
42687 case V4SImode:
42688 case V8HImode:
42689 case V4HImode:
42690 break;
42692 case V16QImode:
42693 if (TARGET_SSE4_1)
42694 break;
42695 wmode = V8HImode;
42696 goto widen;
42697 case V8QImode:
42698 wmode = V4HImode;
42699 goto widen;
42700 widen:
42701 /* There's no way to set one QImode entry easily. Combine
42702 the variable value with its adjacent constant value, and
42703 promote to an HImode set. */
42704 x = XVECEXP (vals, 0, one_var ^ 1);
42705 if (one_var & 1)
42707 var = convert_modes (HImode, QImode, var, true);
42708 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
42709 NULL_RTX, 1, OPTAB_LIB_WIDEN);
42710 x = GEN_INT (INTVAL (x) & 0xff);
42712 else
42714 var = convert_modes (HImode, QImode, var, true);
42715 x = gen_int_mode (INTVAL (x) << 8, HImode);
42717 if (x != const0_rtx)
42718 var = expand_simple_binop (HImode, IOR, var, x, var,
42719 1, OPTAB_LIB_WIDEN);
42721 x = gen_reg_rtx (wmode);
42722 emit_move_insn (x, gen_lowpart (wmode, const_vec));
42723 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
42725 emit_move_insn (target, gen_lowpart (mode, x));
42726 return true;
42728 default:
42729 return false;
42732 emit_move_insn (target, const_vec);
42733 ix86_expand_vector_set (mmx_ok, target, var, one_var);
42734 return true;
42737 /* A subroutine of ix86_expand_vector_init_general. Use vector
42738 concatenate to handle the most general case: all values variable,
42739 and none identical. */
42741 static void
42742 ix86_expand_vector_init_concat (enum machine_mode mode,
42743 rtx target, rtx *ops, int n)
42745 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
42746 rtx first[16], second[8], third[4];
42747 rtvec v;
42748 int i, j;
42750 switch (n)
42752 case 2:
42753 switch (mode)
42755 case V16SImode:
42756 cmode = V8SImode;
42757 break;
42758 case V16SFmode:
42759 cmode = V8SFmode;
42760 break;
42761 case V8DImode:
42762 cmode = V4DImode;
42763 break;
42764 case V8DFmode:
42765 cmode = V4DFmode;
42766 break;
42767 case V8SImode:
42768 cmode = V4SImode;
42769 break;
42770 case V8SFmode:
42771 cmode = V4SFmode;
42772 break;
42773 case V4DImode:
42774 cmode = V2DImode;
42775 break;
42776 case V4DFmode:
42777 cmode = V2DFmode;
42778 break;
42779 case V4SImode:
42780 cmode = V2SImode;
42781 break;
42782 case V4SFmode:
42783 cmode = V2SFmode;
42784 break;
42785 case V2DImode:
42786 cmode = DImode;
42787 break;
42788 case V2SImode:
42789 cmode = SImode;
42790 break;
42791 case V2DFmode:
42792 cmode = DFmode;
42793 break;
42794 case V2SFmode:
42795 cmode = SFmode;
42796 break;
42797 default:
42798 gcc_unreachable ();
42801 if (!register_operand (ops[1], cmode))
42802 ops[1] = force_reg (cmode, ops[1]);
42803 if (!register_operand (ops[0], cmode))
42804 ops[0] = force_reg (cmode, ops[0]);
42805 emit_insn (gen_rtx_SET (VOIDmode, target,
42806 gen_rtx_VEC_CONCAT (mode, ops[0],
42807 ops[1])));
42808 break;
42810 case 4:
42811 switch (mode)
42813 case V4DImode:
42814 cmode = V2DImode;
42815 break;
42816 case V4DFmode:
42817 cmode = V2DFmode;
42818 break;
42819 case V4SImode:
42820 cmode = V2SImode;
42821 break;
42822 case V4SFmode:
42823 cmode = V2SFmode;
42824 break;
42825 default:
42826 gcc_unreachable ();
42828 goto half;
42830 case 8:
42831 switch (mode)
42833 case V8DImode:
42834 cmode = V2DImode;
42835 hmode = V4DImode;
42836 break;
42837 case V8DFmode:
42838 cmode = V2DFmode;
42839 hmode = V4DFmode;
42840 break;
42841 case V8SImode:
42842 cmode = V2SImode;
42843 hmode = V4SImode;
42844 break;
42845 case V8SFmode:
42846 cmode = V2SFmode;
42847 hmode = V4SFmode;
42848 break;
42849 default:
42850 gcc_unreachable ();
42852 goto half;
42854 case 16:
42855 switch (mode)
42857 case V16SImode:
42858 cmode = V2SImode;
42859 hmode = V4SImode;
42860 gmode = V8SImode;
42861 break;
42862 case V16SFmode:
42863 cmode = V2SFmode;
42864 hmode = V4SFmode;
42865 gmode = V8SFmode;
42866 break;
42867 default:
42868 gcc_unreachable ();
42870 goto half;
42872 half:
42873 /* FIXME: We process inputs backward to help RA. PR 36222. */
42874 i = n - 1;
42875 j = (n >> 1) - 1;
42876 for (; i > 0; i -= 2, j--)
42878 first[j] = gen_reg_rtx (cmode);
42879 v = gen_rtvec (2, ops[i - 1], ops[i]);
42880 ix86_expand_vector_init (false, first[j],
42881 gen_rtx_PARALLEL (cmode, v));
42884 n >>= 1;
42885 if (n > 4)
42887 gcc_assert (hmode != VOIDmode);
42888 gcc_assert (gmode != VOIDmode);
42889 for (i = j = 0; i < n; i += 2, j++)
42891 second[j] = gen_reg_rtx (hmode);
42892 ix86_expand_vector_init_concat (hmode, second [j],
42893 &first [i], 2);
42895 n >>= 1;
42896 for (i = j = 0; i < n; i += 2, j++)
42898 third[j] = gen_reg_rtx (gmode);
42899 ix86_expand_vector_init_concat (gmode, third[j],
42900 &second[i], 2);
42902 n >>= 1;
42903 ix86_expand_vector_init_concat (mode, target, third, n);
42905 else if (n > 2)
42907 gcc_assert (hmode != VOIDmode);
42908 for (i = j = 0; i < n; i += 2, j++)
42910 second[j] = gen_reg_rtx (hmode);
42911 ix86_expand_vector_init_concat (hmode, second [j],
42912 &first [i], 2);
42914 n >>= 1;
42915 ix86_expand_vector_init_concat (mode, target, second, n);
42917 else
42918 ix86_expand_vector_init_concat (mode, target, first, n);
42919 break;
42921 default:
42922 gcc_unreachable ();
42926 /* A subroutine of ix86_expand_vector_init_general. Use vector
42927 interleave to handle the most general case: all values variable,
42928 and none identical. */
42930 static void
42931 ix86_expand_vector_init_interleave (enum machine_mode mode,
42932 rtx target, rtx *ops, int n)
42934 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
42935 int i, j;
42936 rtx op0, op1;
42937 rtx (*gen_load_even) (rtx, rtx, rtx);
42938 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
42939 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
42941 switch (mode)
42943 case V8HImode:
42944 gen_load_even = gen_vec_setv8hi;
42945 gen_interleave_first_low = gen_vec_interleave_lowv4si;
42946 gen_interleave_second_low = gen_vec_interleave_lowv2di;
42947 inner_mode = HImode;
42948 first_imode = V4SImode;
42949 second_imode = V2DImode;
42950 third_imode = VOIDmode;
42951 break;
42952 case V16QImode:
42953 gen_load_even = gen_vec_setv16qi;
42954 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
42955 gen_interleave_second_low = gen_vec_interleave_lowv4si;
42956 inner_mode = QImode;
42957 first_imode = V8HImode;
42958 second_imode = V4SImode;
42959 third_imode = V2DImode;
42960 break;
42961 default:
42962 gcc_unreachable ();
42965 for (i = 0; i < n; i++)
42967 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
42968 op0 = gen_reg_rtx (SImode);
42969 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
42971 /* Insert the SImode value as low element of V4SImode vector. */
42972 op1 = gen_reg_rtx (V4SImode);
42973 op0 = gen_rtx_VEC_MERGE (V4SImode,
42974 gen_rtx_VEC_DUPLICATE (V4SImode,
42975 op0),
42976 CONST0_RTX (V4SImode),
42977 const1_rtx);
42978 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
42980 /* Cast the V4SImode vector back to a vector in orignal mode. */
42981 op0 = gen_reg_rtx (mode);
42982 emit_move_insn (op0, gen_lowpart (mode, op1));
42984 /* Load even elements into the second position. */
42985 emit_insn (gen_load_even (op0,
42986 force_reg (inner_mode,
42987 ops [i + i + 1]),
42988 const1_rtx));
42990 /* Cast vector to FIRST_IMODE vector. */
42991 ops[i] = gen_reg_rtx (first_imode);
42992 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
42995 /* Interleave low FIRST_IMODE vectors. */
42996 for (i = j = 0; i < n; i += 2, j++)
42998 op0 = gen_reg_rtx (first_imode);
42999 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
43001 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
43002 ops[j] = gen_reg_rtx (second_imode);
43003 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
43006 /* Interleave low SECOND_IMODE vectors. */
43007 switch (second_imode)
43009 case V4SImode:
43010 for (i = j = 0; i < n / 2; i += 2, j++)
43012 op0 = gen_reg_rtx (second_imode);
43013 emit_insn (gen_interleave_second_low (op0, ops[i],
43014 ops[i + 1]));
43016 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
43017 vector. */
43018 ops[j] = gen_reg_rtx (third_imode);
43019 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
43021 second_imode = V2DImode;
43022 gen_interleave_second_low = gen_vec_interleave_lowv2di;
43023 /* FALLTHRU */
43025 case V2DImode:
43026 op0 = gen_reg_rtx (second_imode);
43027 emit_insn (gen_interleave_second_low (op0, ops[0],
43028 ops[1]));
43030 /* Cast the SECOND_IMODE vector back to a vector on original
43031 mode. */
43032 emit_insn (gen_rtx_SET (VOIDmode, target,
43033 gen_lowpart (mode, op0)));
43034 break;
43036 default:
43037 gcc_unreachable ();
43041 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
43042 all values variable, and none identical. */
43044 static void
43045 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
43046 rtx target, rtx vals)
43048 rtx ops[64], op0, op1, op2, op3, op4, op5;
43049 enum machine_mode half_mode = VOIDmode;
43050 enum machine_mode quarter_mode = VOIDmode;
43051 int n, i;
43053 switch (mode)
43055 case V2SFmode:
43056 case V2SImode:
43057 if (!mmx_ok && !TARGET_SSE)
43058 break;
43059 /* FALLTHRU */
43061 case V16SImode:
43062 case V16SFmode:
43063 case V8DFmode:
43064 case V8DImode:
43065 case V8SFmode:
43066 case V8SImode:
43067 case V4DFmode:
43068 case V4DImode:
43069 case V4SFmode:
43070 case V4SImode:
43071 case V2DFmode:
43072 case V2DImode:
43073 n = GET_MODE_NUNITS (mode);
43074 for (i = 0; i < n; i++)
43075 ops[i] = XVECEXP (vals, 0, i);
43076 ix86_expand_vector_init_concat (mode, target, ops, n);
43077 return;
43079 case V32QImode:
43080 half_mode = V16QImode;
43081 goto half;
43083 case V16HImode:
43084 half_mode = V8HImode;
43085 goto half;
43087 half:
43088 n = GET_MODE_NUNITS (mode);
43089 for (i = 0; i < n; i++)
43090 ops[i] = XVECEXP (vals, 0, i);
43091 op0 = gen_reg_rtx (half_mode);
43092 op1 = gen_reg_rtx (half_mode);
43093 ix86_expand_vector_init_interleave (half_mode, op0, ops,
43094 n >> 2);
43095 ix86_expand_vector_init_interleave (half_mode, op1,
43096 &ops [n >> 1], n >> 2);
43097 emit_insn (gen_rtx_SET (VOIDmode, target,
43098 gen_rtx_VEC_CONCAT (mode, op0, op1)));
43099 return;
43101 case V64QImode:
43102 quarter_mode = V16QImode;
43103 half_mode = V32QImode;
43104 goto quarter;
43106 case V32HImode:
43107 quarter_mode = V8HImode;
43108 half_mode = V16HImode;
43109 goto quarter;
43111 quarter:
43112 n = GET_MODE_NUNITS (mode);
43113 for (i = 0; i < n; i++)
43114 ops[i] = XVECEXP (vals, 0, i);
43115 op0 = gen_reg_rtx (quarter_mode);
43116 op1 = gen_reg_rtx (quarter_mode);
43117 op2 = gen_reg_rtx (quarter_mode);
43118 op3 = gen_reg_rtx (quarter_mode);
43119 op4 = gen_reg_rtx (half_mode);
43120 op5 = gen_reg_rtx (half_mode);
43121 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
43122 n >> 3);
43123 ix86_expand_vector_init_interleave (quarter_mode, op1,
43124 &ops [n >> 2], n >> 3);
43125 ix86_expand_vector_init_interleave (quarter_mode, op2,
43126 &ops [n >> 1], n >> 3);
43127 ix86_expand_vector_init_interleave (quarter_mode, op3,
43128 &ops [(n >> 1) | (n >> 2)], n >> 3);
43129 emit_insn (gen_rtx_SET (VOIDmode, op4,
43130 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
43131 emit_insn (gen_rtx_SET (VOIDmode, op5,
43132 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
43133 emit_insn (gen_rtx_SET (VOIDmode, target,
43134 gen_rtx_VEC_CONCAT (mode, op4, op5)));
43135 return;
43137 case V16QImode:
43138 if (!TARGET_SSE4_1)
43139 break;
43140 /* FALLTHRU */
43142 case V8HImode:
43143 if (!TARGET_SSE2)
43144 break;
43146 /* Don't use ix86_expand_vector_init_interleave if we can't
43147 move from GPR to SSE register directly. */
43148 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
43149 break;
43151 n = GET_MODE_NUNITS (mode);
43152 for (i = 0; i < n; i++)
43153 ops[i] = XVECEXP (vals, 0, i);
43154 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
43155 return;
43157 case V4HImode:
43158 case V8QImode:
43159 break;
43161 default:
43162 gcc_unreachable ();
43166 int i, j, n_elts, n_words, n_elt_per_word;
43167 enum machine_mode inner_mode;
43168 rtx words[4], shift;
43170 inner_mode = GET_MODE_INNER (mode);
43171 n_elts = GET_MODE_NUNITS (mode);
43172 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
43173 n_elt_per_word = n_elts / n_words;
43174 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
43176 for (i = 0; i < n_words; ++i)
43178 rtx word = NULL_RTX;
43180 for (j = 0; j < n_elt_per_word; ++j)
43182 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
43183 elt = convert_modes (word_mode, inner_mode, elt, true);
43185 if (j == 0)
43186 word = elt;
43187 else
43189 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
43190 word, 1, OPTAB_LIB_WIDEN);
43191 word = expand_simple_binop (word_mode, IOR, word, elt,
43192 word, 1, OPTAB_LIB_WIDEN);
43196 words[i] = word;
43199 if (n_words == 1)
43200 emit_move_insn (target, gen_lowpart (mode, words[0]));
43201 else if (n_words == 2)
43203 rtx tmp = gen_reg_rtx (mode);
43204 emit_clobber (tmp);
43205 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
43206 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
43207 emit_move_insn (target, tmp);
43209 else if (n_words == 4)
43211 rtx tmp = gen_reg_rtx (V4SImode);
43212 gcc_assert (word_mode == SImode);
43213 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
43214 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
43215 emit_move_insn (target, gen_lowpart (mode, tmp));
43217 else
43218 gcc_unreachable ();
43222 /* Initialize vector TARGET via VALS. Suppress the use of MMX
43223 instructions unless MMX_OK is true. */
43225 void
43226 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
43228 enum machine_mode mode = GET_MODE (target);
43229 enum machine_mode inner_mode = GET_MODE_INNER (mode);
43230 int n_elts = GET_MODE_NUNITS (mode);
43231 int n_var = 0, one_var = -1;
43232 bool all_same = true, all_const_zero = true;
43233 int i;
43234 rtx x;
43236 for (i = 0; i < n_elts; ++i)
43238 x = XVECEXP (vals, 0, i);
43239 if (!(CONST_INT_P (x)
43240 || GET_CODE (x) == CONST_DOUBLE
43241 || GET_CODE (x) == CONST_FIXED))
43242 n_var++, one_var = i;
43243 else if (x != CONST0_RTX (inner_mode))
43244 all_const_zero = false;
43245 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
43246 all_same = false;
43249 /* Constants are best loaded from the constant pool. */
43250 if (n_var == 0)
43252 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
43253 return;
43256 /* If all values are identical, broadcast the value. */
43257 if (all_same
43258 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
43259 XVECEXP (vals, 0, 0)))
43260 return;
43262 /* Values where only one field is non-constant are best loaded from
43263 the pool and overwritten via move later. */
43264 if (n_var == 1)
43266 if (all_const_zero
43267 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
43268 XVECEXP (vals, 0, one_var),
43269 one_var))
43270 return;
43272 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
43273 return;
43276 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
43279 void
43280 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
43282 enum machine_mode mode = GET_MODE (target);
43283 enum machine_mode inner_mode = GET_MODE_INNER (mode);
43284 enum machine_mode half_mode;
43285 bool use_vec_merge = false;
43286 rtx tmp;
43287 static rtx (*gen_extract[6][2]) (rtx, rtx)
43289 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
43290 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
43291 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
43292 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
43293 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
43294 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
43296 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
43298 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
43299 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
43300 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
43301 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
43302 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
43303 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
43305 int i, j, n;
43307 switch (mode)
43309 case V2SFmode:
43310 case V2SImode:
43311 if (mmx_ok)
43313 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43314 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
43315 if (elt == 0)
43316 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43317 else
43318 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43319 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43320 return;
43322 break;
43324 case V2DImode:
43325 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
43326 if (use_vec_merge)
43327 break;
43329 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
43330 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
43331 if (elt == 0)
43332 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
43333 else
43334 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
43335 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43336 return;
43338 case V2DFmode:
43340 rtx op0, op1;
43342 /* For the two element vectors, we implement a VEC_CONCAT with
43343 the extraction of the other element. */
43345 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
43346 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
43348 if (elt == 0)
43349 op0 = val, op1 = tmp;
43350 else
43351 op0 = tmp, op1 = val;
43353 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
43354 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43356 return;
43358 case V4SFmode:
43359 use_vec_merge = TARGET_SSE4_1;
43360 if (use_vec_merge)
43361 break;
43363 switch (elt)
43365 case 0:
43366 use_vec_merge = true;
43367 break;
43369 case 1:
43370 /* tmp = target = A B C D */
43371 tmp = copy_to_reg (target);
43372 /* target = A A B B */
43373 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
43374 /* target = X A B B */
43375 ix86_expand_vector_set (false, target, val, 0);
43376 /* target = A X C D */
43377 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43378 const1_rtx, const0_rtx,
43379 GEN_INT (2+4), GEN_INT (3+4)));
43380 return;
43382 case 2:
43383 /* tmp = target = A B C D */
43384 tmp = copy_to_reg (target);
43385 /* tmp = X B C D */
43386 ix86_expand_vector_set (false, tmp, val, 0);
43387 /* target = A B X D */
43388 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43389 const0_rtx, const1_rtx,
43390 GEN_INT (0+4), GEN_INT (3+4)));
43391 return;
43393 case 3:
43394 /* tmp = target = A B C D */
43395 tmp = copy_to_reg (target);
43396 /* tmp = X B C D */
43397 ix86_expand_vector_set (false, tmp, val, 0);
43398 /* target = A B X D */
43399 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
43400 const0_rtx, const1_rtx,
43401 GEN_INT (2+4), GEN_INT (0+4)));
43402 return;
43404 default:
43405 gcc_unreachable ();
43407 break;
43409 case V4SImode:
43410 use_vec_merge = TARGET_SSE4_1;
43411 if (use_vec_merge)
43412 break;
43414 /* Element 0 handled by vec_merge below. */
43415 if (elt == 0)
43417 use_vec_merge = true;
43418 break;
43421 if (TARGET_SSE2)
43423 /* With SSE2, use integer shuffles to swap element 0 and ELT,
43424 store into element 0, then shuffle them back. */
43426 rtx order[4];
43428 order[0] = GEN_INT (elt);
43429 order[1] = const1_rtx;
43430 order[2] = const2_rtx;
43431 order[3] = GEN_INT (3);
43432 order[elt] = const0_rtx;
43434 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43435 order[1], order[2], order[3]));
43437 ix86_expand_vector_set (false, target, val, 0);
43439 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
43440 order[1], order[2], order[3]));
43442 else
43444 /* For SSE1, we have to reuse the V4SF code. */
43445 rtx t = gen_reg_rtx (V4SFmode);
43446 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
43447 emit_move_insn (target, gen_lowpart (mode, t));
43449 return;
43451 case V8HImode:
43452 use_vec_merge = TARGET_SSE2;
43453 break;
43454 case V4HImode:
43455 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43456 break;
43458 case V16QImode:
43459 use_vec_merge = TARGET_SSE4_1;
43460 break;
43462 case V8QImode:
43463 break;
43465 case V32QImode:
43466 half_mode = V16QImode;
43467 j = 0;
43468 n = 16;
43469 goto half;
43471 case V16HImode:
43472 half_mode = V8HImode;
43473 j = 1;
43474 n = 8;
43475 goto half;
43477 case V8SImode:
43478 half_mode = V4SImode;
43479 j = 2;
43480 n = 4;
43481 goto half;
43483 case V4DImode:
43484 half_mode = V2DImode;
43485 j = 3;
43486 n = 2;
43487 goto half;
43489 case V8SFmode:
43490 half_mode = V4SFmode;
43491 j = 4;
43492 n = 4;
43493 goto half;
43495 case V4DFmode:
43496 half_mode = V2DFmode;
43497 j = 5;
43498 n = 2;
43499 goto half;
43501 half:
43502 /* Compute offset. */
43503 i = elt / n;
43504 elt %= n;
43506 gcc_assert (i <= 1);
43508 /* Extract the half. */
43509 tmp = gen_reg_rtx (half_mode);
43510 emit_insn (gen_extract[j][i] (tmp, target));
43512 /* Put val in tmp at elt. */
43513 ix86_expand_vector_set (false, tmp, val, elt);
43515 /* Put it back. */
43516 emit_insn (gen_insert[j][i] (target, target, tmp));
43517 return;
43519 case V8DFmode:
43520 tmp = gen_reg_rtx (mode);
43521 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43522 gen_rtx_VEC_DUPLICATE (mode, val)));
43523 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
43524 force_reg (QImode, GEN_INT (1 << elt))));
43525 return;
43526 case V8DImode:
43527 tmp = gen_reg_rtx (mode);
43528 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43529 gen_rtx_VEC_DUPLICATE (mode, val)));
43530 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
43531 force_reg (QImode, GEN_INT (1 << elt))));
43532 return;
43533 case V16SFmode:
43534 tmp = gen_reg_rtx (mode);
43535 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43536 gen_rtx_VEC_DUPLICATE (mode, val)));
43537 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
43538 force_reg (HImode, GEN_INT (1 << elt))));
43539 return;
43540 case V16SImode:
43541 tmp = gen_reg_rtx (mode);
43542 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43543 gen_rtx_VEC_DUPLICATE (mode, val)));
43544 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
43545 force_reg (HImode, GEN_INT (1 << elt))));
43546 return;
43547 case V32HImode:
43548 tmp = gen_reg_rtx (mode);
43549 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43550 gen_rtx_VEC_DUPLICATE (mode, val)));
43551 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
43552 force_reg (SImode, GEN_INT (1 << elt))));
43553 return;
43554 case V64QImode:
43555 tmp = gen_reg_rtx (mode);
43556 emit_insn (gen_rtx_SET (VOIDmode, tmp,
43557 gen_rtx_VEC_DUPLICATE (mode, val)));
43558 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
43559 force_reg (DImode, GEN_INT (1 << elt))));
43560 return;
43562 default:
43563 break;
43566 if (use_vec_merge)
43568 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
43569 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
43570 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43572 else
43574 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
43576 emit_move_insn (mem, target);
43578 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
43579 emit_move_insn (tmp, val);
43581 emit_move_insn (target, mem);
43585 void
43586 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
43588 enum machine_mode mode = GET_MODE (vec);
43589 enum machine_mode inner_mode = GET_MODE_INNER (mode);
43590 bool use_vec_extr = false;
43591 rtx tmp;
43593 switch (mode)
43595 case V2SImode:
43596 case V2SFmode:
43597 if (!mmx_ok)
43598 break;
43599 /* FALLTHRU */
43601 case V2DFmode:
43602 case V2DImode:
43603 use_vec_extr = true;
43604 break;
43606 case V4SFmode:
43607 use_vec_extr = TARGET_SSE4_1;
43608 if (use_vec_extr)
43609 break;
43611 switch (elt)
43613 case 0:
43614 tmp = vec;
43615 break;
43617 case 1:
43618 case 3:
43619 tmp = gen_reg_rtx (mode);
43620 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
43621 GEN_INT (elt), GEN_INT (elt),
43622 GEN_INT (elt+4), GEN_INT (elt+4)));
43623 break;
43625 case 2:
43626 tmp = gen_reg_rtx (mode);
43627 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
43628 break;
43630 default:
43631 gcc_unreachable ();
43633 vec = tmp;
43634 use_vec_extr = true;
43635 elt = 0;
43636 break;
43638 case V4SImode:
43639 use_vec_extr = TARGET_SSE4_1;
43640 if (use_vec_extr)
43641 break;
43643 if (TARGET_SSE2)
43645 switch (elt)
43647 case 0:
43648 tmp = vec;
43649 break;
43651 case 1:
43652 case 3:
43653 tmp = gen_reg_rtx (mode);
43654 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
43655 GEN_INT (elt), GEN_INT (elt),
43656 GEN_INT (elt), GEN_INT (elt)));
43657 break;
43659 case 2:
43660 tmp = gen_reg_rtx (mode);
43661 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
43662 break;
43664 default:
43665 gcc_unreachable ();
43667 vec = tmp;
43668 use_vec_extr = true;
43669 elt = 0;
43671 else
43673 /* For SSE1, we have to reuse the V4SF code. */
43674 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
43675 gen_lowpart (V4SFmode, vec), elt);
43676 return;
43678 break;
43680 case V8HImode:
43681 use_vec_extr = TARGET_SSE2;
43682 break;
43683 case V4HImode:
43684 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
43685 break;
43687 case V16QImode:
43688 use_vec_extr = TARGET_SSE4_1;
43689 break;
43691 case V8SFmode:
43692 if (TARGET_AVX)
43694 tmp = gen_reg_rtx (V4SFmode);
43695 if (elt < 4)
43696 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
43697 else
43698 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
43699 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43700 return;
43702 break;
43704 case V4DFmode:
43705 if (TARGET_AVX)
43707 tmp = gen_reg_rtx (V2DFmode);
43708 if (elt < 2)
43709 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
43710 else
43711 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
43712 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43713 return;
43715 break;
43717 case V32QImode:
43718 if (TARGET_AVX)
43720 tmp = gen_reg_rtx (V16QImode);
43721 if (elt < 16)
43722 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
43723 else
43724 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
43725 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43726 return;
43728 break;
43730 case V16HImode:
43731 if (TARGET_AVX)
43733 tmp = gen_reg_rtx (V8HImode);
43734 if (elt < 8)
43735 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
43736 else
43737 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
43738 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43739 return;
43741 break;
43743 case V8SImode:
43744 if (TARGET_AVX)
43746 tmp = gen_reg_rtx (V4SImode);
43747 if (elt < 4)
43748 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
43749 else
43750 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
43751 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43752 return;
43754 break;
43756 case V4DImode:
43757 if (TARGET_AVX)
43759 tmp = gen_reg_rtx (V2DImode);
43760 if (elt < 2)
43761 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
43762 else
43763 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
43764 ix86_expand_vector_extract (false, target, tmp, elt & 1);
43765 return;
43767 break;
43769 case V32HImode:
43770 if (TARGET_AVX512BW)
43772 tmp = gen_reg_rtx (V16HImode);
43773 if (elt < 16)
43774 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
43775 else
43776 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
43777 ix86_expand_vector_extract (false, target, tmp, elt & 15);
43778 return;
43780 break;
43782 case V64QImode:
43783 if (TARGET_AVX512BW)
43785 tmp = gen_reg_rtx (V32QImode);
43786 if (elt < 32)
43787 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
43788 else
43789 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
43790 ix86_expand_vector_extract (false, target, tmp, elt & 31);
43791 return;
43793 break;
43795 case V16SFmode:
43796 tmp = gen_reg_rtx (V8SFmode);
43797 if (elt < 8)
43798 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
43799 else
43800 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
43801 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43802 return;
43804 case V8DFmode:
43805 tmp = gen_reg_rtx (V4DFmode);
43806 if (elt < 4)
43807 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
43808 else
43809 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
43810 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43811 return;
43813 case V16SImode:
43814 tmp = gen_reg_rtx (V8SImode);
43815 if (elt < 8)
43816 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
43817 else
43818 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
43819 ix86_expand_vector_extract (false, target, tmp, elt & 7);
43820 return;
43822 case V8DImode:
43823 tmp = gen_reg_rtx (V4DImode);
43824 if (elt < 4)
43825 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
43826 else
43827 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
43828 ix86_expand_vector_extract (false, target, tmp, elt & 3);
43829 return;
43831 case V8QImode:
43832 /* ??? Could extract the appropriate HImode element and shift. */
43833 default:
43834 break;
43837 if (use_vec_extr)
43839 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
43840 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
43842 /* Let the rtl optimizers know about the zero extension performed. */
43843 if (inner_mode == QImode || inner_mode == HImode)
43845 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
43846 target = gen_lowpart (SImode, target);
43849 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
43851 else
43853 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
43855 emit_move_insn (mem, vec);
43857 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
43858 emit_move_insn (target, tmp);
43862 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
43863 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
43864 The upper bits of DEST are undefined, though they shouldn't cause
43865 exceptions (some bits from src or all zeros are ok). */
43867 static void
43868 emit_reduc_half (rtx dest, rtx src, int i)
43870 rtx tem, d = dest;
43871 switch (GET_MODE (src))
43873 case V4SFmode:
43874 if (i == 128)
43875 tem = gen_sse_movhlps (dest, src, src);
43876 else
43877 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
43878 GEN_INT (1 + 4), GEN_INT (1 + 4));
43879 break;
43880 case V2DFmode:
43881 tem = gen_vec_interleave_highv2df (dest, src, src);
43882 break;
43883 case V16QImode:
43884 case V8HImode:
43885 case V4SImode:
43886 case V2DImode:
43887 d = gen_reg_rtx (V1TImode);
43888 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
43889 GEN_INT (i / 2));
43890 break;
43891 case V8SFmode:
43892 if (i == 256)
43893 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
43894 else
43895 tem = gen_avx_shufps256 (dest, src, src,
43896 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
43897 break;
43898 case V4DFmode:
43899 if (i == 256)
43900 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
43901 else
43902 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
43903 break;
43904 case V32QImode:
43905 case V16HImode:
43906 case V8SImode:
43907 case V4DImode:
43908 if (i == 256)
43910 if (GET_MODE (dest) != V4DImode)
43911 d = gen_reg_rtx (V4DImode);
43912 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
43913 gen_lowpart (V4DImode, src),
43914 const1_rtx);
43916 else
43918 d = gen_reg_rtx (V2TImode);
43919 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
43920 GEN_INT (i / 2));
43922 break;
43923 case V64QImode:
43924 case V32HImode:
43925 case V16SImode:
43926 case V16SFmode:
43927 case V8DImode:
43928 case V8DFmode:
43929 if (i > 128)
43930 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
43931 gen_lowpart (V16SImode, src),
43932 gen_lowpart (V16SImode, src),
43933 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
43934 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
43935 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
43936 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
43937 GEN_INT (0xC), GEN_INT (0xD),
43938 GEN_INT (0xE), GEN_INT (0xF),
43939 GEN_INT (0x10), GEN_INT (0x11),
43940 GEN_INT (0x12), GEN_INT (0x13),
43941 GEN_INT (0x14), GEN_INT (0x15),
43942 GEN_INT (0x16), GEN_INT (0x17));
43943 else
43944 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
43945 gen_lowpart (V16SImode, src),
43946 GEN_INT (i == 128 ? 0x2 : 0x1),
43947 GEN_INT (0x3),
43948 GEN_INT (0x3),
43949 GEN_INT (0x3),
43950 GEN_INT (i == 128 ? 0x6 : 0x5),
43951 GEN_INT (0x7),
43952 GEN_INT (0x7),
43953 GEN_INT (0x7),
43954 GEN_INT (i == 128 ? 0xA : 0x9),
43955 GEN_INT (0xB),
43956 GEN_INT (0xB),
43957 GEN_INT (0xB),
43958 GEN_INT (i == 128 ? 0xE : 0xD),
43959 GEN_INT (0xF),
43960 GEN_INT (0xF),
43961 GEN_INT (0xF));
43962 break;
43963 default:
43964 gcc_unreachable ();
43966 emit_insn (tem);
43967 if (d != dest)
43968 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
43971 /* Expand a vector reduction. FN is the binary pattern to reduce;
43972 DEST is the destination; IN is the input vector. */
43974 void
43975 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
43977 rtx half, dst, vec = in;
43978 enum machine_mode mode = GET_MODE (in);
43979 int i;
43981 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
43982 if (TARGET_SSE4_1
43983 && mode == V8HImode
43984 && fn == gen_uminv8hi3)
43986 emit_insn (gen_sse4_1_phminposuw (dest, in));
43987 return;
43990 for (i = GET_MODE_BITSIZE (mode);
43991 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
43992 i >>= 1)
43994 half = gen_reg_rtx (mode);
43995 emit_reduc_half (half, vec, i);
43996 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
43997 dst = dest;
43998 else
43999 dst = gen_reg_rtx (mode);
44000 emit_insn (fn (dst, half, vec));
44001 vec = dst;
44005 /* Target hook for scalar_mode_supported_p. */
44006 static bool
44007 ix86_scalar_mode_supported_p (enum machine_mode mode)
44009 if (DECIMAL_FLOAT_MODE_P (mode))
44010 return default_decimal_float_supported_p ();
44011 else if (mode == TFmode)
44012 return true;
44013 else
44014 return default_scalar_mode_supported_p (mode);
44017 /* Implements target hook vector_mode_supported_p. */
44018 static bool
44019 ix86_vector_mode_supported_p (enum machine_mode mode)
44021 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
44022 return true;
44023 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
44024 return true;
44025 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
44026 return true;
44027 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
44028 return true;
44029 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
44030 return true;
44031 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
44032 return true;
44033 return false;
44036 /* Target hook for c_mode_for_suffix. */
44037 static enum machine_mode
44038 ix86_c_mode_for_suffix (char suffix)
44040 if (suffix == 'q')
44041 return TFmode;
44042 if (suffix == 'w')
44043 return XFmode;
44045 return VOIDmode;
44048 /* Worker function for TARGET_MD_ASM_CLOBBERS.
44050 We do this in the new i386 backend to maintain source compatibility
44051 with the old cc0-based compiler. */
44053 static tree
44054 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
44055 tree inputs ATTRIBUTE_UNUSED,
44056 tree clobbers)
44058 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
44059 clobbers);
44060 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
44061 clobbers);
44062 return clobbers;
44065 /* Implements target vector targetm.asm.encode_section_info. */
44067 static void ATTRIBUTE_UNUSED
44068 ix86_encode_section_info (tree decl, rtx rtl, int first)
44070 default_encode_section_info (decl, rtl, first);
44072 if (TREE_CODE (decl) == VAR_DECL
44073 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
44074 && ix86_in_large_data_p (decl))
44075 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
44078 /* Worker function for REVERSE_CONDITION. */
44080 enum rtx_code
44081 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
44083 return (mode != CCFPmode && mode != CCFPUmode
44084 ? reverse_condition (code)
44085 : reverse_condition_maybe_unordered (code));
44088 /* Output code to perform an x87 FP register move, from OPERANDS[1]
44089 to OPERANDS[0]. */
44091 const char *
44092 output_387_reg_move (rtx insn, rtx *operands)
44094 if (REG_P (operands[0]))
44096 if (REG_P (operands[1])
44097 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44099 if (REGNO (operands[0]) == FIRST_STACK_REG)
44100 return output_387_ffreep (operands, 0);
44101 return "fstp\t%y0";
44103 if (STACK_TOP_P (operands[0]))
44104 return "fld%Z1\t%y1";
44105 return "fst\t%y0";
44107 else if (MEM_P (operands[0]))
44109 gcc_assert (REG_P (operands[1]));
44110 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
44111 return "fstp%Z0\t%y0";
44112 else
44114 /* There is no non-popping store to memory for XFmode.
44115 So if we need one, follow the store with a load. */
44116 if (GET_MODE (operands[0]) == XFmode)
44117 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
44118 else
44119 return "fst%Z0\t%y0";
44122 else
44123 gcc_unreachable();
44126 /* Output code to perform a conditional jump to LABEL, if C2 flag in
44127 FP status register is set. */
44129 void
44130 ix86_emit_fp_unordered_jump (rtx label)
44132 rtx reg = gen_reg_rtx (HImode);
44133 rtx temp;
44135 emit_insn (gen_x86_fnstsw_1 (reg));
44137 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
44139 emit_insn (gen_x86_sahf_1 (reg));
44141 temp = gen_rtx_REG (CCmode, FLAGS_REG);
44142 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
44144 else
44146 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
44148 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
44149 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
44152 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
44153 gen_rtx_LABEL_REF (VOIDmode, label),
44154 pc_rtx);
44155 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
44157 emit_jump_insn (temp);
44158 predict_jump (REG_BR_PROB_BASE * 10 / 100);
44161 /* Output code to perform a log1p XFmode calculation. */
44163 void ix86_emit_i387_log1p (rtx op0, rtx op1)
44165 rtx label1 = gen_label_rtx ();
44166 rtx label2 = gen_label_rtx ();
44168 rtx tmp = gen_reg_rtx (XFmode);
44169 rtx tmp2 = gen_reg_rtx (XFmode);
44170 rtx test;
44172 emit_insn (gen_absxf2 (tmp, op1));
44173 test = gen_rtx_GE (VOIDmode, tmp,
44174 CONST_DOUBLE_FROM_REAL_VALUE (
44175 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
44176 XFmode));
44177 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
44179 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44180 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
44181 emit_jump (label2);
44183 emit_label (label1);
44184 emit_move_insn (tmp, CONST1_RTX (XFmode));
44185 emit_insn (gen_addxf3 (tmp, op1, tmp));
44186 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
44187 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
44189 emit_label (label2);
44192 /* Emit code for round calculation. */
44193 void ix86_emit_i387_round (rtx op0, rtx op1)
44195 enum machine_mode inmode = GET_MODE (op1);
44196 enum machine_mode outmode = GET_MODE (op0);
44197 rtx e1, e2, res, tmp, tmp1, half;
44198 rtx scratch = gen_reg_rtx (HImode);
44199 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
44200 rtx jump_label = gen_label_rtx ();
44201 rtx insn;
44202 rtx (*gen_abs) (rtx, rtx);
44203 rtx (*gen_neg) (rtx, rtx);
44205 switch (inmode)
44207 case SFmode:
44208 gen_abs = gen_abssf2;
44209 break;
44210 case DFmode:
44211 gen_abs = gen_absdf2;
44212 break;
44213 case XFmode:
44214 gen_abs = gen_absxf2;
44215 break;
44216 default:
44217 gcc_unreachable ();
44220 switch (outmode)
44222 case SFmode:
44223 gen_neg = gen_negsf2;
44224 break;
44225 case DFmode:
44226 gen_neg = gen_negdf2;
44227 break;
44228 case XFmode:
44229 gen_neg = gen_negxf2;
44230 break;
44231 case HImode:
44232 gen_neg = gen_neghi2;
44233 break;
44234 case SImode:
44235 gen_neg = gen_negsi2;
44236 break;
44237 case DImode:
44238 gen_neg = gen_negdi2;
44239 break;
44240 default:
44241 gcc_unreachable ();
44244 e1 = gen_reg_rtx (inmode);
44245 e2 = gen_reg_rtx (inmode);
44246 res = gen_reg_rtx (outmode);
44248 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
44250 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
44252 /* scratch = fxam(op1) */
44253 emit_insn (gen_rtx_SET (VOIDmode, scratch,
44254 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
44255 UNSPEC_FXAM)));
44256 /* e1 = fabs(op1) */
44257 emit_insn (gen_abs (e1, op1));
44259 /* e2 = e1 + 0.5 */
44260 half = force_reg (inmode, half);
44261 emit_insn (gen_rtx_SET (VOIDmode, e2,
44262 gen_rtx_PLUS (inmode, e1, half)));
44264 /* res = floor(e2) */
44265 if (inmode != XFmode)
44267 tmp1 = gen_reg_rtx (XFmode);
44269 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
44270 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
44272 else
44273 tmp1 = e2;
44275 switch (outmode)
44277 case SFmode:
44278 case DFmode:
44280 rtx tmp0 = gen_reg_rtx (XFmode);
44282 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
44284 emit_insn (gen_rtx_SET (VOIDmode, res,
44285 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
44286 UNSPEC_TRUNC_NOOP)));
44288 break;
44289 case XFmode:
44290 emit_insn (gen_frndintxf2_floor (res, tmp1));
44291 break;
44292 case HImode:
44293 emit_insn (gen_lfloorxfhi2 (res, tmp1));
44294 break;
44295 case SImode:
44296 emit_insn (gen_lfloorxfsi2 (res, tmp1));
44297 break;
44298 case DImode:
44299 emit_insn (gen_lfloorxfdi2 (res, tmp1));
44300 break;
44301 default:
44302 gcc_unreachable ();
44305 /* flags = signbit(a) */
44306 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
44308 /* if (flags) then res = -res */
44309 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
44310 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
44311 gen_rtx_LABEL_REF (VOIDmode, jump_label),
44312 pc_rtx);
44313 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
44314 predict_jump (REG_BR_PROB_BASE * 50 / 100);
44315 JUMP_LABEL (insn) = jump_label;
44317 emit_insn (gen_neg (res, res));
44319 emit_label (jump_label);
44320 LABEL_NUSES (jump_label) = 1;
44322 emit_move_insn (op0, res);
44325 /* Output code to perform a Newton-Rhapson approximation of a single precision
44326 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
44328 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
44330 rtx x0, x1, e0, e1;
44332 x0 = gen_reg_rtx (mode);
44333 e0 = gen_reg_rtx (mode);
44334 e1 = gen_reg_rtx (mode);
44335 x1 = gen_reg_rtx (mode);
44337 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
44339 b = force_reg (mode, b);
44341 /* x0 = rcp(b) estimate */
44342 if (mode == V16SFmode || mode == V8DFmode)
44343 emit_insn (gen_rtx_SET (VOIDmode, x0,
44344 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44345 UNSPEC_RCP14)));
44346 else
44347 emit_insn (gen_rtx_SET (VOIDmode, x0,
44348 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
44349 UNSPEC_RCP)));
44351 /* e0 = x0 * b */
44352 emit_insn (gen_rtx_SET (VOIDmode, e0,
44353 gen_rtx_MULT (mode, x0, b)));
44355 /* e0 = x0 * e0 */
44356 emit_insn (gen_rtx_SET (VOIDmode, e0,
44357 gen_rtx_MULT (mode, x0, e0)));
44359 /* e1 = x0 + x0 */
44360 emit_insn (gen_rtx_SET (VOIDmode, e1,
44361 gen_rtx_PLUS (mode, x0, x0)));
44363 /* x1 = e1 - e0 */
44364 emit_insn (gen_rtx_SET (VOIDmode, x1,
44365 gen_rtx_MINUS (mode, e1, e0)));
44367 /* res = a * x1 */
44368 emit_insn (gen_rtx_SET (VOIDmode, res,
44369 gen_rtx_MULT (mode, a, x1)));
44372 /* Output code to perform a Newton-Rhapson approximation of a
44373 single precision floating point [reciprocal] square root. */
44375 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
44376 bool recip)
44378 rtx x0, e0, e1, e2, e3, mthree, mhalf;
44379 REAL_VALUE_TYPE r;
44380 int unspec;
44382 x0 = gen_reg_rtx (mode);
44383 e0 = gen_reg_rtx (mode);
44384 e1 = gen_reg_rtx (mode);
44385 e2 = gen_reg_rtx (mode);
44386 e3 = gen_reg_rtx (mode);
44388 real_from_integer (&r, VOIDmode, -3, SIGNED);
44389 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
44391 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
44392 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
44393 unspec = UNSPEC_RSQRT;
44395 if (VECTOR_MODE_P (mode))
44397 mthree = ix86_build_const_vector (mode, true, mthree);
44398 mhalf = ix86_build_const_vector (mode, true, mhalf);
44399 /* There is no 512-bit rsqrt. There is however rsqrt14. */
44400 if (GET_MODE_SIZE (mode) == 64)
44401 unspec = UNSPEC_RSQRT14;
44404 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
44405 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
44407 a = force_reg (mode, a);
44409 /* x0 = rsqrt(a) estimate */
44410 emit_insn (gen_rtx_SET (VOIDmode, x0,
44411 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
44412 unspec)));
44414 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
44415 if (!recip)
44417 rtx zero, mask;
44419 zero = gen_reg_rtx (mode);
44420 mask = gen_reg_rtx (mode);
44422 zero = force_reg (mode, CONST0_RTX(mode));
44424 /* Handle masked compare. */
44425 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
44427 mask = gen_reg_rtx (HImode);
44428 /* Imm value 0x4 corresponds to not-equal comparison. */
44429 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
44430 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
44432 else
44434 emit_insn (gen_rtx_SET (VOIDmode, mask,
44435 gen_rtx_NE (mode, zero, a)));
44437 emit_insn (gen_rtx_SET (VOIDmode, x0,
44438 gen_rtx_AND (mode, x0, mask)));
44442 /* e0 = x0 * a */
44443 emit_insn (gen_rtx_SET (VOIDmode, e0,
44444 gen_rtx_MULT (mode, x0, a)));
44445 /* e1 = e0 * x0 */
44446 emit_insn (gen_rtx_SET (VOIDmode, e1,
44447 gen_rtx_MULT (mode, e0, x0)));
44449 /* e2 = e1 - 3. */
44450 mthree = force_reg (mode, mthree);
44451 emit_insn (gen_rtx_SET (VOIDmode, e2,
44452 gen_rtx_PLUS (mode, e1, mthree)));
44454 mhalf = force_reg (mode, mhalf);
44455 if (recip)
44456 /* e3 = -.5 * x0 */
44457 emit_insn (gen_rtx_SET (VOIDmode, e3,
44458 gen_rtx_MULT (mode, x0, mhalf)));
44459 else
44460 /* e3 = -.5 * e0 */
44461 emit_insn (gen_rtx_SET (VOIDmode, e3,
44462 gen_rtx_MULT (mode, e0, mhalf)));
44463 /* ret = e2 * e3 */
44464 emit_insn (gen_rtx_SET (VOIDmode, res,
44465 gen_rtx_MULT (mode, e2, e3)));
44468 #ifdef TARGET_SOLARIS
44469 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
44471 static void
44472 i386_solaris_elf_named_section (const char *name, unsigned int flags,
44473 tree decl)
44475 /* With Binutils 2.15, the "@unwind" marker must be specified on
44476 every occurrence of the ".eh_frame" section, not just the first
44477 one. */
44478 if (TARGET_64BIT
44479 && strcmp (name, ".eh_frame") == 0)
44481 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
44482 flags & SECTION_WRITE ? "aw" : "a");
44483 return;
44486 #ifndef USE_GAS
44487 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
44489 solaris_elf_asm_comdat_section (name, flags, decl);
44490 return;
44492 #endif
44494 default_elf_asm_named_section (name, flags, decl);
44496 #endif /* TARGET_SOLARIS */
44498 /* Return the mangling of TYPE if it is an extended fundamental type. */
44500 static const char *
44501 ix86_mangle_type (const_tree type)
44503 type = TYPE_MAIN_VARIANT (type);
44505 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
44506 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
44507 return NULL;
44509 switch (TYPE_MODE (type))
44511 case TFmode:
44512 /* __float128 is "g". */
44513 return "g";
44514 case XFmode:
44515 /* "long double" or __float80 is "e". */
44516 return "e";
44517 default:
44518 return NULL;
44522 /* For 32-bit code we can save PIC register setup by using
44523 __stack_chk_fail_local hidden function instead of calling
44524 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
44525 register, so it is better to call __stack_chk_fail directly. */
44527 static tree ATTRIBUTE_UNUSED
44528 ix86_stack_protect_fail (void)
44530 return TARGET_64BIT
44531 ? default_external_stack_protect_fail ()
44532 : default_hidden_stack_protect_fail ();
44535 /* Select a format to encode pointers in exception handling data. CODE
44536 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
44537 true if the symbol may be affected by dynamic relocations.
44539 ??? All x86 object file formats are capable of representing this.
44540 After all, the relocation needed is the same as for the call insn.
44541 Whether or not a particular assembler allows us to enter such, I
44542 guess we'll have to see. */
44544 asm_preferred_eh_data_format (int code, int global)
44546 if (flag_pic)
44548 int type = DW_EH_PE_sdata8;
44549 if (!TARGET_64BIT
44550 || ix86_cmodel == CM_SMALL_PIC
44551 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
44552 type = DW_EH_PE_sdata4;
44553 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
44555 if (ix86_cmodel == CM_SMALL
44556 || (ix86_cmodel == CM_MEDIUM && code))
44557 return DW_EH_PE_udata4;
44558 return DW_EH_PE_absptr;
44561 /* Expand copysign from SIGN to the positive value ABS_VALUE
44562 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
44563 the sign-bit. */
44564 static void
44565 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
44567 enum machine_mode mode = GET_MODE (sign);
44568 rtx sgn = gen_reg_rtx (mode);
44569 if (mask == NULL_RTX)
44571 enum machine_mode vmode;
44573 if (mode == SFmode)
44574 vmode = V4SFmode;
44575 else if (mode == DFmode)
44576 vmode = V2DFmode;
44577 else
44578 vmode = mode;
44580 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
44581 if (!VECTOR_MODE_P (mode))
44583 /* We need to generate a scalar mode mask in this case. */
44584 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
44585 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
44586 mask = gen_reg_rtx (mode);
44587 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
44590 else
44591 mask = gen_rtx_NOT (mode, mask);
44592 emit_insn (gen_rtx_SET (VOIDmode, sgn,
44593 gen_rtx_AND (mode, mask, sign)));
44594 emit_insn (gen_rtx_SET (VOIDmode, result,
44595 gen_rtx_IOR (mode, abs_value, sgn)));
44598 /* Expand fabs (OP0) and return a new rtx that holds the result. The
44599 mask for masking out the sign-bit is stored in *SMASK, if that is
44600 non-null. */
44601 static rtx
44602 ix86_expand_sse_fabs (rtx op0, rtx *smask)
44604 enum machine_mode vmode, mode = GET_MODE (op0);
44605 rtx xa, mask;
44607 xa = gen_reg_rtx (mode);
44608 if (mode == SFmode)
44609 vmode = V4SFmode;
44610 else if (mode == DFmode)
44611 vmode = V2DFmode;
44612 else
44613 vmode = mode;
44614 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
44615 if (!VECTOR_MODE_P (mode))
44617 /* We need to generate a scalar mode mask in this case. */
44618 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
44619 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
44620 mask = gen_reg_rtx (mode);
44621 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
44623 emit_insn (gen_rtx_SET (VOIDmode, xa,
44624 gen_rtx_AND (mode, op0, mask)));
44626 if (smask)
44627 *smask = mask;
44629 return xa;
44632 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
44633 swapping the operands if SWAP_OPERANDS is true. The expanded
44634 code is a forward jump to a newly created label in case the
44635 comparison is true. The generated label rtx is returned. */
44636 static rtx
44637 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
44638 bool swap_operands)
44640 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
44641 rtx label, tmp;
44643 if (swap_operands)
44645 tmp = op0;
44646 op0 = op1;
44647 op1 = tmp;
44650 label = gen_label_rtx ();
44651 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
44652 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44653 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
44654 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
44655 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
44656 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
44657 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
44658 JUMP_LABEL (tmp) = label;
44660 return label;
44663 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
44664 using comparison code CODE. Operands are swapped for the comparison if
44665 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
44666 static rtx
44667 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
44668 bool swap_operands)
44670 rtx (*insn)(rtx, rtx, rtx, rtx);
44671 enum machine_mode mode = GET_MODE (op0);
44672 rtx mask = gen_reg_rtx (mode);
44674 if (swap_operands)
44676 rtx tmp = op0;
44677 op0 = op1;
44678 op1 = tmp;
44681 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
44683 emit_insn (insn (mask, op0, op1,
44684 gen_rtx_fmt_ee (code, mode, op0, op1)));
44685 return mask;
44688 /* Generate and return a rtx of mode MODE for 2**n where n is the number
44689 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
44690 static rtx
44691 ix86_gen_TWO52 (enum machine_mode mode)
44693 REAL_VALUE_TYPE TWO52r;
44694 rtx TWO52;
44696 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
44697 TWO52 = const_double_from_real_value (TWO52r, mode);
44698 TWO52 = force_reg (mode, TWO52);
44700 return TWO52;
44703 /* Expand SSE sequence for computing lround from OP1 storing
44704 into OP0. */
44705 void
44706 ix86_expand_lround (rtx op0, rtx op1)
44708 /* C code for the stuff we're doing below:
44709 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
44710 return (long)tmp;
44712 enum machine_mode mode = GET_MODE (op1);
44713 const struct real_format *fmt;
44714 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
44715 rtx adj;
44717 /* load nextafter (0.5, 0.0) */
44718 fmt = REAL_MODE_FORMAT (mode);
44719 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
44720 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
44722 /* adj = copysign (0.5, op1) */
44723 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
44724 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
44726 /* adj = op1 + adj */
44727 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
44729 /* op0 = (imode)adj */
44730 expand_fix (op0, adj, 0);
44733 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
44734 into OPERAND0. */
44735 void
44736 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
44738 /* C code for the stuff we're doing below (for do_floor):
44739 xi = (long)op1;
44740 xi -= (double)xi > op1 ? 1 : 0;
44741 return xi;
44743 enum machine_mode fmode = GET_MODE (op1);
44744 enum machine_mode imode = GET_MODE (op0);
44745 rtx ireg, freg, label, tmp;
44747 /* reg = (long)op1 */
44748 ireg = gen_reg_rtx (imode);
44749 expand_fix (ireg, op1, 0);
44751 /* freg = (double)reg */
44752 freg = gen_reg_rtx (fmode);
44753 expand_float (freg, ireg, 0);
44755 /* ireg = (freg > op1) ? ireg - 1 : ireg */
44756 label = ix86_expand_sse_compare_and_jump (UNLE,
44757 freg, op1, !do_floor);
44758 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
44759 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
44760 emit_move_insn (ireg, tmp);
44762 emit_label (label);
44763 LABEL_NUSES (label) = 1;
44765 emit_move_insn (op0, ireg);
44768 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
44769 result in OPERAND0. */
44770 void
44771 ix86_expand_rint (rtx operand0, rtx operand1)
44773 /* C code for the stuff we're doing below:
44774 xa = fabs (operand1);
44775 if (!isless (xa, 2**52))
44776 return operand1;
44777 xa = xa + 2**52 - 2**52;
44778 return copysign (xa, operand1);
44780 enum machine_mode mode = GET_MODE (operand0);
44781 rtx res, xa, label, TWO52, mask;
44783 res = gen_reg_rtx (mode);
44784 emit_move_insn (res, operand1);
44786 /* xa = abs (operand1) */
44787 xa = ix86_expand_sse_fabs (res, &mask);
44789 /* if (!isless (xa, TWO52)) goto label; */
44790 TWO52 = ix86_gen_TWO52 (mode);
44791 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44793 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44794 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44796 ix86_sse_copysign_to_positive (res, xa, res, mask);
44798 emit_label (label);
44799 LABEL_NUSES (label) = 1;
44801 emit_move_insn (operand0, res);
44804 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44805 into OPERAND0. */
44806 void
44807 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
44809 /* C code for the stuff we expand below.
44810 double xa = fabs (x), x2;
44811 if (!isless (xa, TWO52))
44812 return x;
44813 xa = xa + TWO52 - TWO52;
44814 x2 = copysign (xa, x);
44815 Compensate. Floor:
44816 if (x2 > x)
44817 x2 -= 1;
44818 Compensate. Ceil:
44819 if (x2 < x)
44820 x2 -= -1;
44821 return x2;
44823 enum machine_mode mode = GET_MODE (operand0);
44824 rtx xa, TWO52, tmp, label, one, res, mask;
44826 TWO52 = ix86_gen_TWO52 (mode);
44828 /* Temporary for holding the result, initialized to the input
44829 operand to ease control flow. */
44830 res = gen_reg_rtx (mode);
44831 emit_move_insn (res, operand1);
44833 /* xa = abs (operand1) */
44834 xa = ix86_expand_sse_fabs (res, &mask);
44836 /* if (!isless (xa, TWO52)) goto label; */
44837 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44839 /* xa = xa + TWO52 - TWO52; */
44840 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44841 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
44843 /* xa = copysign (xa, operand1) */
44844 ix86_sse_copysign_to_positive (xa, xa, res, mask);
44846 /* generate 1.0 or -1.0 */
44847 one = force_reg (mode,
44848 const_double_from_real_value (do_floor
44849 ? dconst1 : dconstm1, mode));
44851 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44852 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44853 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44854 gen_rtx_AND (mode, one, tmp)));
44855 /* We always need to subtract here to preserve signed zero. */
44856 tmp = expand_simple_binop (mode, MINUS,
44857 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44858 emit_move_insn (res, tmp);
44860 emit_label (label);
44861 LABEL_NUSES (label) = 1;
44863 emit_move_insn (operand0, res);
44866 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
44867 into OPERAND0. */
44868 void
44869 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
44871 /* C code for the stuff we expand below.
44872 double xa = fabs (x), x2;
44873 if (!isless (xa, TWO52))
44874 return x;
44875 x2 = (double)(long)x;
44876 Compensate. Floor:
44877 if (x2 > x)
44878 x2 -= 1;
44879 Compensate. Ceil:
44880 if (x2 < x)
44881 x2 += 1;
44882 if (HONOR_SIGNED_ZEROS (mode))
44883 return copysign (x2, x);
44884 return x2;
44886 enum machine_mode mode = GET_MODE (operand0);
44887 rtx xa, xi, TWO52, tmp, label, one, res, mask;
44889 TWO52 = ix86_gen_TWO52 (mode);
44891 /* Temporary for holding the result, initialized to the input
44892 operand to ease control flow. */
44893 res = gen_reg_rtx (mode);
44894 emit_move_insn (res, operand1);
44896 /* xa = abs (operand1) */
44897 xa = ix86_expand_sse_fabs (res, &mask);
44899 /* if (!isless (xa, TWO52)) goto label; */
44900 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44902 /* xa = (double)(long)x */
44903 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
44904 expand_fix (xi, res, 0);
44905 expand_float (xa, xi, 0);
44907 /* generate 1.0 */
44908 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
44910 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
44911 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
44912 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44913 gen_rtx_AND (mode, one, tmp)));
44914 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
44915 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44916 emit_move_insn (res, tmp);
44918 if (HONOR_SIGNED_ZEROS (mode))
44919 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
44921 emit_label (label);
44922 LABEL_NUSES (label) = 1;
44924 emit_move_insn (operand0, res);
44927 /* Expand SSE sequence for computing round from OPERAND1 storing
44928 into OPERAND0. Sequence that works without relying on DImode truncation
44929 via cvttsd2siq that is only available on 64bit targets. */
44930 void
44931 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
44933 /* C code for the stuff we expand below.
44934 double xa = fabs (x), xa2, x2;
44935 if (!isless (xa, TWO52))
44936 return x;
44937 Using the absolute value and copying back sign makes
44938 -0.0 -> -0.0 correct.
44939 xa2 = xa + TWO52 - TWO52;
44940 Compensate.
44941 dxa = xa2 - xa;
44942 if (dxa <= -0.5)
44943 xa2 += 1;
44944 else if (dxa > 0.5)
44945 xa2 -= 1;
44946 x2 = copysign (xa2, x);
44947 return x2;
44949 enum machine_mode mode = GET_MODE (operand0);
44950 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
44952 TWO52 = ix86_gen_TWO52 (mode);
44954 /* Temporary for holding the result, initialized to the input
44955 operand to ease control flow. */
44956 res = gen_reg_rtx (mode);
44957 emit_move_insn (res, operand1);
44959 /* xa = abs (operand1) */
44960 xa = ix86_expand_sse_fabs (res, &mask);
44962 /* if (!isless (xa, TWO52)) goto label; */
44963 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
44965 /* xa2 = xa + TWO52 - TWO52; */
44966 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
44967 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
44969 /* dxa = xa2 - xa; */
44970 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
44972 /* generate 0.5, 1.0 and -0.5 */
44973 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
44974 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
44975 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
44976 0, OPTAB_DIRECT);
44978 /* Compensate. */
44979 tmp = gen_reg_rtx (mode);
44980 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
44981 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
44982 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44983 gen_rtx_AND (mode, one, tmp)));
44984 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44985 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
44986 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
44987 emit_insn (gen_rtx_SET (VOIDmode, tmp,
44988 gen_rtx_AND (mode, one, tmp)));
44989 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
44991 /* res = copysign (xa2, operand1) */
44992 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
44994 emit_label (label);
44995 LABEL_NUSES (label) = 1;
44997 emit_move_insn (operand0, res);
45000 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45001 into OPERAND0. */
45002 void
45003 ix86_expand_trunc (rtx operand0, rtx operand1)
45005 /* C code for SSE variant we expand below.
45006 double xa = fabs (x), x2;
45007 if (!isless (xa, TWO52))
45008 return x;
45009 x2 = (double)(long)x;
45010 if (HONOR_SIGNED_ZEROS (mode))
45011 return copysign (x2, x);
45012 return x2;
45014 enum machine_mode mode = GET_MODE (operand0);
45015 rtx xa, xi, TWO52, label, res, mask;
45017 TWO52 = ix86_gen_TWO52 (mode);
45019 /* Temporary for holding the result, initialized to the input
45020 operand to ease control flow. */
45021 res = gen_reg_rtx (mode);
45022 emit_move_insn (res, operand1);
45024 /* xa = abs (operand1) */
45025 xa = ix86_expand_sse_fabs (res, &mask);
45027 /* if (!isless (xa, TWO52)) goto label; */
45028 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45030 /* x = (double)(long)x */
45031 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45032 expand_fix (xi, res, 0);
45033 expand_float (res, xi, 0);
45035 if (HONOR_SIGNED_ZEROS (mode))
45036 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
45038 emit_label (label);
45039 LABEL_NUSES (label) = 1;
45041 emit_move_insn (operand0, res);
45044 /* Expand SSE sequence for computing trunc from OPERAND1 storing
45045 into OPERAND0. */
45046 void
45047 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
45049 enum machine_mode mode = GET_MODE (operand0);
45050 rtx xa, mask, TWO52, label, one, res, smask, tmp;
45052 /* C code for SSE variant we expand below.
45053 double xa = fabs (x), x2;
45054 if (!isless (xa, TWO52))
45055 return x;
45056 xa2 = xa + TWO52 - TWO52;
45057 Compensate:
45058 if (xa2 > xa)
45059 xa2 -= 1.0;
45060 x2 = copysign (xa2, x);
45061 return x2;
45064 TWO52 = ix86_gen_TWO52 (mode);
45066 /* Temporary for holding the result, initialized to the input
45067 operand to ease control flow. */
45068 res = gen_reg_rtx (mode);
45069 emit_move_insn (res, operand1);
45071 /* xa = abs (operand1) */
45072 xa = ix86_expand_sse_fabs (res, &smask);
45074 /* if (!isless (xa, TWO52)) goto label; */
45075 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45077 /* res = xa + TWO52 - TWO52; */
45078 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
45079 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
45080 emit_move_insn (res, tmp);
45082 /* generate 1.0 */
45083 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
45085 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
45086 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
45087 emit_insn (gen_rtx_SET (VOIDmode, mask,
45088 gen_rtx_AND (mode, mask, one)));
45089 tmp = expand_simple_binop (mode, MINUS,
45090 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
45091 emit_move_insn (res, tmp);
45093 /* res = copysign (res, operand1) */
45094 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
45096 emit_label (label);
45097 LABEL_NUSES (label) = 1;
45099 emit_move_insn (operand0, res);
45102 /* Expand SSE sequence for computing round from OPERAND1 storing
45103 into OPERAND0. */
45104 void
45105 ix86_expand_round (rtx operand0, rtx operand1)
45107 /* C code for the stuff we're doing below:
45108 double xa = fabs (x);
45109 if (!isless (xa, TWO52))
45110 return x;
45111 xa = (double)(long)(xa + nextafter (0.5, 0.0));
45112 return copysign (xa, x);
45114 enum machine_mode mode = GET_MODE (operand0);
45115 rtx res, TWO52, xa, label, xi, half, mask;
45116 const struct real_format *fmt;
45117 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45119 /* Temporary for holding the result, initialized to the input
45120 operand to ease control flow. */
45121 res = gen_reg_rtx (mode);
45122 emit_move_insn (res, operand1);
45124 TWO52 = ix86_gen_TWO52 (mode);
45125 xa = ix86_expand_sse_fabs (res, &mask);
45126 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
45128 /* load nextafter (0.5, 0.0) */
45129 fmt = REAL_MODE_FORMAT (mode);
45130 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45131 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45133 /* xa = xa + 0.5 */
45134 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
45135 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
45137 /* xa = (double)(int64_t)xa */
45138 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
45139 expand_fix (xi, xa, 0);
45140 expand_float (xa, xi, 0);
45142 /* res = copysign (xa, operand1) */
45143 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
45145 emit_label (label);
45146 LABEL_NUSES (label) = 1;
45148 emit_move_insn (operand0, res);
45151 /* Expand SSE sequence for computing round
45152 from OP1 storing into OP0 using sse4 round insn. */
45153 void
45154 ix86_expand_round_sse4 (rtx op0, rtx op1)
45156 enum machine_mode mode = GET_MODE (op0);
45157 rtx e1, e2, res, half;
45158 const struct real_format *fmt;
45159 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
45160 rtx (*gen_copysign) (rtx, rtx, rtx);
45161 rtx (*gen_round) (rtx, rtx, rtx);
45163 switch (mode)
45165 case SFmode:
45166 gen_copysign = gen_copysignsf3;
45167 gen_round = gen_sse4_1_roundsf2;
45168 break;
45169 case DFmode:
45170 gen_copysign = gen_copysigndf3;
45171 gen_round = gen_sse4_1_rounddf2;
45172 break;
45173 default:
45174 gcc_unreachable ();
45177 /* round (a) = trunc (a + copysign (0.5, a)) */
45179 /* load nextafter (0.5, 0.0) */
45180 fmt = REAL_MODE_FORMAT (mode);
45181 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
45182 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
45183 half = const_double_from_real_value (pred_half, mode);
45185 /* e1 = copysign (0.5, op1) */
45186 e1 = gen_reg_rtx (mode);
45187 emit_insn (gen_copysign (e1, half, op1));
45189 /* e2 = op1 + e1 */
45190 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
45192 /* res = trunc (e2) */
45193 res = gen_reg_rtx (mode);
45194 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
45196 emit_move_insn (op0, res);
45200 /* Table of valid machine attributes. */
45201 static const struct attribute_spec ix86_attribute_table[] =
45203 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
45204 affects_type_identity } */
45205 /* Stdcall attribute says callee is responsible for popping arguments
45206 if they are not variable. */
45207 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45208 true },
45209 /* Fastcall attribute says callee is responsible for popping arguments
45210 if they are not variable. */
45211 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45212 true },
45213 /* Thiscall attribute says callee is responsible for popping arguments
45214 if they are not variable. */
45215 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45216 true },
45217 /* Cdecl attribute says the callee is a normal C declaration */
45218 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45219 true },
45220 /* Regparm attribute specifies how many integer arguments are to be
45221 passed in registers. */
45222 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
45223 true },
45224 /* Sseregparm attribute says we are using x86_64 calling conventions
45225 for FP arguments. */
45226 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
45227 true },
45228 /* The transactional memory builtins are implicitly regparm or fastcall
45229 depending on the ABI. Override the generic do-nothing attribute that
45230 these builtins were declared with. */
45231 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
45232 true },
45233 /* force_align_arg_pointer says this function realigns the stack at entry. */
45234 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
45235 false, true, true, ix86_handle_cconv_attribute, false },
45236 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
45237 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
45238 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
45239 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
45240 false },
45241 #endif
45242 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
45243 false },
45244 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
45245 false },
45246 #ifdef SUBTARGET_ATTRIBUTE_TABLE
45247 SUBTARGET_ATTRIBUTE_TABLE,
45248 #endif
45249 /* ms_abi and sysv_abi calling convention function attributes. */
45250 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
45251 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
45252 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
45253 false },
45254 { "callee_pop_aggregate_return", 1, 1, false, true, true,
45255 ix86_handle_callee_pop_aggregate_return, true },
45256 /* End element. */
45257 { NULL, 0, 0, false, false, false, NULL, false }
45260 /* Implement targetm.vectorize.builtin_vectorization_cost. */
45261 static int
45262 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
45263 tree vectype,
45264 int misalign ATTRIBUTE_UNUSED)
45266 unsigned elements;
45268 switch (type_of_cost)
45270 case scalar_stmt:
45271 return ix86_cost->scalar_stmt_cost;
45273 case scalar_load:
45274 return ix86_cost->scalar_load_cost;
45276 case scalar_store:
45277 return ix86_cost->scalar_store_cost;
45279 case vector_stmt:
45280 return ix86_cost->vec_stmt_cost;
45282 case vector_load:
45283 return ix86_cost->vec_align_load_cost;
45285 case vector_store:
45286 return ix86_cost->vec_store_cost;
45288 case vec_to_scalar:
45289 return ix86_cost->vec_to_scalar_cost;
45291 case scalar_to_vec:
45292 return ix86_cost->scalar_to_vec_cost;
45294 case unaligned_load:
45295 case unaligned_store:
45296 return ix86_cost->vec_unalign_load_cost;
45298 case cond_branch_taken:
45299 return ix86_cost->cond_taken_branch_cost;
45301 case cond_branch_not_taken:
45302 return ix86_cost->cond_not_taken_branch_cost;
45304 case vec_perm:
45305 case vec_promote_demote:
45306 return ix86_cost->vec_stmt_cost;
45308 case vec_construct:
45309 elements = TYPE_VECTOR_SUBPARTS (vectype);
45310 return elements / 2 + 1;
45312 default:
45313 gcc_unreachable ();
45317 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
45318 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
45319 insn every time. */
45321 static GTY(()) rtx vselect_insn;
45323 /* Initialize vselect_insn. */
45325 static void
45326 init_vselect_insn (void)
45328 unsigned i;
45329 rtx x;
45331 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
45332 for (i = 0; i < MAX_VECT_LEN; ++i)
45333 XVECEXP (x, 0, i) = const0_rtx;
45334 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
45335 const0_rtx), x);
45336 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
45337 start_sequence ();
45338 vselect_insn = emit_insn (x);
45339 end_sequence ();
45342 /* Construct (set target (vec_select op0 (parallel perm))) and
45343 return true if that's a valid instruction in the active ISA. */
45345 static bool
45346 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
45347 unsigned nelt, bool testing_p)
45349 unsigned int i;
45350 rtx x, save_vconcat;
45351 int icode;
45353 if (vselect_insn == NULL_RTX)
45354 init_vselect_insn ();
45356 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
45357 PUT_NUM_ELEM (XVEC (x, 0), nelt);
45358 for (i = 0; i < nelt; ++i)
45359 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
45360 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
45361 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
45362 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
45363 SET_DEST (PATTERN (vselect_insn)) = target;
45364 icode = recog_memoized (vselect_insn);
45366 if (icode >= 0 && !testing_p)
45367 emit_insn (copy_rtx (PATTERN (vselect_insn)));
45369 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
45370 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
45371 INSN_CODE (vselect_insn) = -1;
45373 return icode >= 0;
45376 /* Similar, but generate a vec_concat from op0 and op1 as well. */
45378 static bool
45379 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
45380 const unsigned char *perm, unsigned nelt,
45381 bool testing_p)
45383 enum machine_mode v2mode;
45384 rtx x;
45385 bool ok;
45387 if (vselect_insn == NULL_RTX)
45388 init_vselect_insn ();
45390 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
45391 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
45392 PUT_MODE (x, v2mode);
45393 XEXP (x, 0) = op0;
45394 XEXP (x, 1) = op1;
45395 ok = expand_vselect (target, x, perm, nelt, testing_p);
45396 XEXP (x, 0) = const0_rtx;
45397 XEXP (x, 1) = const0_rtx;
45398 return ok;
45401 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45402 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
45404 static bool
45405 expand_vec_perm_blend (struct expand_vec_perm_d *d)
45407 enum machine_mode vmode = d->vmode;
45408 unsigned i, mask, nelt = d->nelt;
45409 rtx target, op0, op1, x;
45410 rtx rperm[32], vperm;
45412 if (d->one_operand_p)
45413 return false;
45414 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 &&
45415 GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
45417 else if (TARGET_AVX512VL)
45419 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
45421 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
45423 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
45425 else
45426 return false;
45428 /* This is a blend, not a permute. Elements must stay in their
45429 respective lanes. */
45430 for (i = 0; i < nelt; ++i)
45432 unsigned e = d->perm[i];
45433 if (!(e == i || e == i + nelt))
45434 return false;
45437 if (d->testing_p)
45438 return true;
45440 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
45441 decision should be extracted elsewhere, so that we only try that
45442 sequence once all budget==3 options have been tried. */
45443 target = d->target;
45444 op0 = d->op0;
45445 op1 = d->op1;
45446 mask = 0;
45448 switch (vmode)
45450 case V8DFmode:
45451 case V16SFmode:
45452 case V4DFmode:
45453 case V8SFmode:
45454 case V2DFmode:
45455 case V4SFmode:
45456 case V8HImode:
45457 case V8SImode:
45458 case V32HImode:
45459 case V64QImode:
45460 case V16SImode:
45461 case V8DImode:
45462 for (i = 0; i < nelt; ++i)
45463 mask |= (d->perm[i] >= nelt) << i;
45464 break;
45466 case V2DImode:
45467 for (i = 0; i < 2; ++i)
45468 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
45469 vmode = V8HImode;
45470 goto do_subreg;
45472 case V4SImode:
45473 for (i = 0; i < 4; ++i)
45474 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
45475 vmode = V8HImode;
45476 goto do_subreg;
45478 case V16QImode:
45479 /* See if bytes move in pairs so we can use pblendw with
45480 an immediate argument, rather than pblendvb with a vector
45481 argument. */
45482 for (i = 0; i < 16; i += 2)
45483 if (d->perm[i] + 1 != d->perm[i + 1])
45485 use_pblendvb:
45486 for (i = 0; i < nelt; ++i)
45487 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
45489 finish_pblendvb:
45490 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
45491 vperm = force_reg (vmode, vperm);
45493 if (GET_MODE_SIZE (vmode) == 16)
45494 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
45495 else
45496 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
45497 if (target != d->target)
45498 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45499 return true;
45502 for (i = 0; i < 8; ++i)
45503 mask |= (d->perm[i * 2] >= 16) << i;
45504 vmode = V8HImode;
45505 /* FALLTHRU */
45507 do_subreg:
45508 target = gen_reg_rtx (vmode);
45509 op0 = gen_lowpart (vmode, op0);
45510 op1 = gen_lowpart (vmode, op1);
45511 break;
45513 case V32QImode:
45514 /* See if bytes move in pairs. If not, vpblendvb must be used. */
45515 for (i = 0; i < 32; i += 2)
45516 if (d->perm[i] + 1 != d->perm[i + 1])
45517 goto use_pblendvb;
45518 /* See if bytes move in quadruplets. If yes, vpblendd
45519 with immediate can be used. */
45520 for (i = 0; i < 32; i += 4)
45521 if (d->perm[i] + 2 != d->perm[i + 2])
45522 break;
45523 if (i < 32)
45525 /* See if bytes move the same in both lanes. If yes,
45526 vpblendw with immediate can be used. */
45527 for (i = 0; i < 16; i += 2)
45528 if (d->perm[i] + 16 != d->perm[i + 16])
45529 goto use_pblendvb;
45531 /* Use vpblendw. */
45532 for (i = 0; i < 16; ++i)
45533 mask |= (d->perm[i * 2] >= 32) << i;
45534 vmode = V16HImode;
45535 goto do_subreg;
45538 /* Use vpblendd. */
45539 for (i = 0; i < 8; ++i)
45540 mask |= (d->perm[i * 4] >= 32) << i;
45541 vmode = V8SImode;
45542 goto do_subreg;
45544 case V16HImode:
45545 /* See if words move in pairs. If yes, vpblendd can be used. */
45546 for (i = 0; i < 16; i += 2)
45547 if (d->perm[i] + 1 != d->perm[i + 1])
45548 break;
45549 if (i < 16)
45551 /* See if words move the same in both lanes. If not,
45552 vpblendvb must be used. */
45553 for (i = 0; i < 8; i++)
45554 if (d->perm[i] + 8 != d->perm[i + 8])
45556 /* Use vpblendvb. */
45557 for (i = 0; i < 32; ++i)
45558 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
45560 vmode = V32QImode;
45561 nelt = 32;
45562 target = gen_reg_rtx (vmode);
45563 op0 = gen_lowpart (vmode, op0);
45564 op1 = gen_lowpart (vmode, op1);
45565 goto finish_pblendvb;
45568 /* Use vpblendw. */
45569 for (i = 0; i < 16; ++i)
45570 mask |= (d->perm[i] >= 16) << i;
45571 break;
45574 /* Use vpblendd. */
45575 for (i = 0; i < 8; ++i)
45576 mask |= (d->perm[i * 2] >= 16) << i;
45577 vmode = V8SImode;
45578 goto do_subreg;
45580 case V4DImode:
45581 /* Use vpblendd. */
45582 for (i = 0; i < 4; ++i)
45583 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
45584 vmode = V8SImode;
45585 goto do_subreg;
45587 default:
45588 gcc_unreachable ();
45591 /* This matches five different patterns with the different modes. */
45592 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
45593 x = gen_rtx_SET (VOIDmode, target, x);
45594 emit_insn (x);
45595 if (target != d->target)
45596 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45598 return true;
45601 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45602 in terms of the variable form of vpermilps.
45604 Note that we will have already failed the immediate input vpermilps,
45605 which requires that the high and low part shuffle be identical; the
45606 variable form doesn't require that. */
45608 static bool
45609 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
45611 rtx rperm[8], vperm;
45612 unsigned i;
45614 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
45615 return false;
45617 /* We can only permute within the 128-bit lane. */
45618 for (i = 0; i < 8; ++i)
45620 unsigned e = d->perm[i];
45621 if (i < 4 ? e >= 4 : e < 4)
45622 return false;
45625 if (d->testing_p)
45626 return true;
45628 for (i = 0; i < 8; ++i)
45630 unsigned e = d->perm[i];
45632 /* Within each 128-bit lane, the elements of op0 are numbered
45633 from 0 and the elements of op1 are numbered from 4. */
45634 if (e >= 8 + 4)
45635 e -= 8;
45636 else if (e >= 4)
45637 e -= 4;
45639 rperm[i] = GEN_INT (e);
45642 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
45643 vperm = force_reg (V8SImode, vperm);
45644 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
45646 return true;
45649 /* Return true if permutation D can be performed as VMODE permutation
45650 instead. */
45652 static bool
45653 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
45655 unsigned int i, j, chunk;
45657 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
45658 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
45659 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
45660 return false;
45662 if (GET_MODE_NUNITS (vmode) >= d->nelt)
45663 return true;
45665 chunk = d->nelt / GET_MODE_NUNITS (vmode);
45666 for (i = 0; i < d->nelt; i += chunk)
45667 if (d->perm[i] & (chunk - 1))
45668 return false;
45669 else
45670 for (j = 1; j < chunk; ++j)
45671 if (d->perm[i] + j != d->perm[i + j])
45672 return false;
45674 return true;
45677 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
45678 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
45680 static bool
45681 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
45683 unsigned i, nelt, eltsz, mask;
45684 unsigned char perm[64];
45685 enum machine_mode vmode = V16QImode;
45686 rtx rperm[64], vperm, target, op0, op1;
45688 nelt = d->nelt;
45690 if (!d->one_operand_p)
45692 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
45694 if (TARGET_AVX2
45695 && valid_perm_using_mode_p (V2TImode, d))
45697 if (d->testing_p)
45698 return true;
45700 /* Use vperm2i128 insn. The pattern uses
45701 V4DImode instead of V2TImode. */
45702 target = d->target;
45703 if (d->vmode != V4DImode)
45704 target = gen_reg_rtx (V4DImode);
45705 op0 = gen_lowpart (V4DImode, d->op0);
45706 op1 = gen_lowpart (V4DImode, d->op1);
45707 rperm[0]
45708 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
45709 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
45710 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
45711 if (target != d->target)
45712 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45713 return true;
45715 return false;
45718 else
45720 if (GET_MODE_SIZE (d->vmode) == 16)
45722 if (!TARGET_SSSE3)
45723 return false;
45725 else if (GET_MODE_SIZE (d->vmode) == 32)
45727 if (!TARGET_AVX2)
45728 return false;
45730 /* V4DImode should be already handled through
45731 expand_vselect by vpermq instruction. */
45732 gcc_assert (d->vmode != V4DImode);
45734 vmode = V32QImode;
45735 if (d->vmode == V8SImode
45736 || d->vmode == V16HImode
45737 || d->vmode == V32QImode)
45739 /* First see if vpermq can be used for
45740 V8SImode/V16HImode/V32QImode. */
45741 if (valid_perm_using_mode_p (V4DImode, d))
45743 for (i = 0; i < 4; i++)
45744 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
45745 if (d->testing_p)
45746 return true;
45747 target = gen_reg_rtx (V4DImode);
45748 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
45749 perm, 4, false))
45751 emit_move_insn (d->target,
45752 gen_lowpart (d->vmode, target));
45753 return true;
45755 return false;
45758 /* Next see if vpermd can be used. */
45759 if (valid_perm_using_mode_p (V8SImode, d))
45760 vmode = V8SImode;
45762 /* Or if vpermps can be used. */
45763 else if (d->vmode == V8SFmode)
45764 vmode = V8SImode;
45766 if (vmode == V32QImode)
45768 /* vpshufb only works intra lanes, it is not
45769 possible to shuffle bytes in between the lanes. */
45770 for (i = 0; i < nelt; ++i)
45771 if ((d->perm[i] ^ i) & (nelt / 2))
45772 return false;
45775 else if (GET_MODE_SIZE (d->vmode) == 64)
45777 if (!TARGET_AVX512BW)
45778 return false;
45779 if (vmode == V64QImode)
45781 for (i = 0; i < nelt; ++i)
45782 if ((d->perm[i] ^ i) & (nelt / 4))
45783 return false;
45786 else
45787 return false;
45790 if (d->testing_p)
45791 return true;
45793 if (vmode == V8SImode)
45794 for (i = 0; i < 8; ++i)
45795 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
45796 else
45798 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
45799 if (!d->one_operand_p)
45800 mask = 2 * nelt - 1;
45801 else if (vmode == V16QImode)
45802 mask = nelt - 1;
45803 else if (vmode == V64QImode)
45804 mask = nelt / 4 - 1;
45805 else
45806 mask = nelt / 2 - 1;
45808 for (i = 0; i < nelt; ++i)
45810 unsigned j, e = d->perm[i] & mask;
45811 for (j = 0; j < eltsz; ++j)
45812 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
45816 vperm = gen_rtx_CONST_VECTOR (vmode,
45817 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
45818 vperm = force_reg (vmode, vperm);
45820 target = d->target;
45821 if (d->vmode != vmode)
45822 target = gen_reg_rtx (vmode);
45823 op0 = gen_lowpart (vmode, d->op0);
45824 if (d->one_operand_p)
45826 if (vmode == V16QImode)
45827 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
45828 else if (vmode == V32QImode)
45829 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
45830 else if (vmode == V64QImode)
45831 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
45832 else if (vmode == V8SFmode)
45833 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
45834 else
45835 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
45837 else
45839 op1 = gen_lowpart (vmode, d->op1);
45840 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
45842 if (target != d->target)
45843 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
45845 return true;
45848 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
45849 in a single instruction. */
45851 static bool
45852 expand_vec_perm_1 (struct expand_vec_perm_d *d)
45854 unsigned i, nelt = d->nelt;
45855 unsigned char perm2[MAX_VECT_LEN];
45857 /* Check plain VEC_SELECT first, because AVX has instructions that could
45858 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
45859 input where SEL+CONCAT may not. */
45860 if (d->one_operand_p)
45862 int mask = nelt - 1;
45863 bool identity_perm = true;
45864 bool broadcast_perm = true;
45866 for (i = 0; i < nelt; i++)
45868 perm2[i] = d->perm[i] & mask;
45869 if (perm2[i] != i)
45870 identity_perm = false;
45871 if (perm2[i])
45872 broadcast_perm = false;
45875 if (identity_perm)
45877 if (!d->testing_p)
45878 emit_move_insn (d->target, d->op0);
45879 return true;
45881 else if (broadcast_perm && TARGET_AVX2)
45883 /* Use vpbroadcast{b,w,d}. */
45884 rtx (*gen) (rtx, rtx) = NULL;
45885 switch (d->vmode)
45887 case V64QImode:
45888 if (TARGET_AVX512VL)
45889 gen = gen_avx512bw_vec_dupv64qi;
45890 break;
45891 case V32QImode:
45892 gen = gen_avx2_pbroadcastv32qi_1;
45893 break;
45894 case V32HImode:
45895 if (TARGET_AVX512VL)
45896 gen = gen_avx512bw_vec_dupv32hi;
45897 break;
45898 case V16HImode:
45899 gen = gen_avx2_pbroadcastv16hi_1;
45900 break;
45901 case V16SImode:
45902 if (TARGET_AVX512F)
45903 gen = gen_avx512f_vec_dupv16si;
45904 break;
45905 case V8SImode:
45906 gen = gen_avx2_pbroadcastv8si_1;
45907 break;
45908 case V16QImode:
45909 gen = gen_avx2_pbroadcastv16qi;
45910 break;
45911 case V8HImode:
45912 gen = gen_avx2_pbroadcastv8hi;
45913 break;
45914 case V16SFmode:
45915 if (TARGET_AVX512F)
45916 gen = gen_avx512f_vec_dupv16sf;
45917 break;
45918 case V8SFmode:
45919 gen = gen_avx2_vec_dupv8sf_1;
45920 break;
45921 case V8DFmode:
45922 if (TARGET_AVX512F)
45923 gen = gen_avx512f_vec_dupv8df;
45924 break;
45925 case V8DImode:
45926 if (TARGET_AVX512F)
45927 gen = gen_avx512f_vec_dupv8di;
45928 break;
45929 /* For other modes prefer other shuffles this function creates. */
45930 default: break;
45932 if (gen != NULL)
45934 if (!d->testing_p)
45935 emit_insn (gen (d->target, d->op0));
45936 return true;
45940 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
45941 return true;
45943 /* There are plenty of patterns in sse.md that are written for
45944 SEL+CONCAT and are not replicated for a single op. Perhaps
45945 that should be changed, to avoid the nastiness here. */
45947 /* Recognize interleave style patterns, which means incrementing
45948 every other permutation operand. */
45949 for (i = 0; i < nelt; i += 2)
45951 perm2[i] = d->perm[i] & mask;
45952 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
45954 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
45955 d->testing_p))
45956 return true;
45958 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
45959 if (nelt >= 4)
45961 for (i = 0; i < nelt; i += 4)
45963 perm2[i + 0] = d->perm[i + 0] & mask;
45964 perm2[i + 1] = d->perm[i + 1] & mask;
45965 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
45966 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
45969 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
45970 d->testing_p))
45971 return true;
45975 /* Finally, try the fully general two operand permute. */
45976 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
45977 d->testing_p))
45978 return true;
45980 /* Recognize interleave style patterns with reversed operands. */
45981 if (!d->one_operand_p)
45983 for (i = 0; i < nelt; ++i)
45985 unsigned e = d->perm[i];
45986 if (e >= nelt)
45987 e -= nelt;
45988 else
45989 e += nelt;
45990 perm2[i] = e;
45993 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
45994 d->testing_p))
45995 return true;
45998 /* Try the SSE4.1 blend variable merge instructions. */
45999 if (expand_vec_perm_blend (d))
46000 return true;
46002 /* Try one of the AVX vpermil variable permutations. */
46003 if (expand_vec_perm_vpermil (d))
46004 return true;
46006 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
46007 vpshufb, vpermd, vpermps or vpermq variable permutation. */
46008 if (expand_vec_perm_pshufb (d))
46009 return true;
46011 /* Try the AVX512F vpermi2 instructions. */
46012 rtx vec[64];
46013 enum machine_mode mode = d->vmode;
46014 if (mode == V8DFmode)
46015 mode = V8DImode;
46016 else if (mode == V16SFmode)
46017 mode = V16SImode;
46018 else if (mode == V4DFmode)
46019 mode = V4DImode;
46020 else if (mode == V2DFmode)
46021 mode = V2DImode;
46022 else if (mode == V8SFmode)
46023 mode = V8SImode;
46024 else if (mode == V4SFmode)
46025 mode = V4SImode;
46026 for (i = 0; i < nelt; ++i)
46027 vec[i] = GEN_INT (d->perm[i]);
46028 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
46029 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
46030 return true;
46032 return false;
46035 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
46036 in terms of a pair of pshuflw + pshufhw instructions. */
46038 static bool
46039 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
46041 unsigned char perm2[MAX_VECT_LEN];
46042 unsigned i;
46043 bool ok;
46045 if (d->vmode != V8HImode || !d->one_operand_p)
46046 return false;
46048 /* The two permutations only operate in 64-bit lanes. */
46049 for (i = 0; i < 4; ++i)
46050 if (d->perm[i] >= 4)
46051 return false;
46052 for (i = 4; i < 8; ++i)
46053 if (d->perm[i] < 4)
46054 return false;
46056 if (d->testing_p)
46057 return true;
46059 /* Emit the pshuflw. */
46060 memcpy (perm2, d->perm, 4);
46061 for (i = 4; i < 8; ++i)
46062 perm2[i] = i;
46063 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
46064 gcc_assert (ok);
46066 /* Emit the pshufhw. */
46067 memcpy (perm2 + 4, d->perm + 4, 4);
46068 for (i = 0; i < 4; ++i)
46069 perm2[i] = i;
46070 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
46071 gcc_assert (ok);
46073 return true;
46076 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46077 the permutation using the SSSE3 palignr instruction. This succeeds
46078 when all of the elements in PERM fit within one vector and we merely
46079 need to shift them down so that a single vector permutation has a
46080 chance to succeed. */
46082 static bool
46083 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
46085 unsigned i, nelt = d->nelt;
46086 unsigned min, max;
46087 bool in_order, ok;
46088 rtx shift, target;
46089 struct expand_vec_perm_d dcopy;
46091 /* Even with AVX, palignr only operates on 128-bit vectors. */
46092 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
46093 return false;
46095 min = nelt, max = 0;
46096 for (i = 0; i < nelt; ++i)
46098 unsigned e = d->perm[i];
46099 if (e < min)
46100 min = e;
46101 if (e > max)
46102 max = e;
46104 if (min == 0 || max - min >= nelt)
46105 return false;
46107 /* Given that we have SSSE3, we know we'll be able to implement the
46108 single operand permutation after the palignr with pshufb. */
46109 if (d->testing_p)
46110 return true;
46112 dcopy = *d;
46113 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
46114 target = gen_reg_rtx (TImode);
46115 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
46116 gen_lowpart (TImode, d->op0), shift));
46118 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
46119 dcopy.one_operand_p = true;
46121 in_order = true;
46122 for (i = 0; i < nelt; ++i)
46124 unsigned e = dcopy.perm[i] - min;
46125 if (e != i)
46126 in_order = false;
46127 dcopy.perm[i] = e;
46130 /* Test for the degenerate case where the alignment by itself
46131 produces the desired permutation. */
46132 if (in_order)
46134 emit_move_insn (d->target, dcopy.op0);
46135 return true;
46138 ok = expand_vec_perm_1 (&dcopy);
46139 gcc_assert (ok);
46141 return ok;
46144 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
46145 the permutation using the SSE4_1 pblendv instruction. Potentially
46146 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
46148 static bool
46149 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
46151 unsigned i, which, nelt = d->nelt;
46152 struct expand_vec_perm_d dcopy, dcopy1;
46153 enum machine_mode vmode = d->vmode;
46154 bool ok;
46156 /* Use the same checks as in expand_vec_perm_blend, but skipping
46157 AVX and AVX2 as they require more than 2 instructions. */
46158 if (d->one_operand_p)
46159 return false;
46160 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
46162 else
46163 return false;
46165 /* Figure out where permutation elements stay not in their
46166 respective lanes. */
46167 for (i = 0, which = 0; i < nelt; ++i)
46169 unsigned e = d->perm[i];
46170 if (e != i)
46171 which |= (e < nelt ? 1 : 2);
46173 /* We can pblend the part where elements stay not in their
46174 respective lanes only when these elements are all in one
46175 half of a permutation.
46176 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
46177 lanes, but both 8 and 9 >= 8
46178 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
46179 respective lanes and 8 >= 8, but 2 not. */
46180 if (which != 1 && which != 2)
46181 return false;
46182 if (d->testing_p)
46183 return true;
46185 /* First we apply one operand permutation to the part where
46186 elements stay not in their respective lanes. */
46187 dcopy = *d;
46188 if (which == 2)
46189 dcopy.op0 = dcopy.op1 = d->op1;
46190 else
46191 dcopy.op0 = dcopy.op1 = d->op0;
46192 dcopy.one_operand_p = true;
46194 for (i = 0; i < nelt; ++i)
46195 dcopy.perm[i] = d->perm[i] & (nelt - 1);
46197 ok = expand_vec_perm_1 (&dcopy);
46198 gcc_assert (ok);
46200 /* Next we put permuted elements into their positions. */
46201 dcopy1 = *d;
46202 if (which == 2)
46203 dcopy1.op1 = dcopy.target;
46204 else
46205 dcopy1.op0 = dcopy.target;
46207 for (i = 0; i < nelt; ++i)
46208 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
46210 ok = expand_vec_perm_blend (&dcopy1);
46211 gcc_assert (ok);
46213 return true;
46216 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
46218 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46219 a two vector permutation into a single vector permutation by using
46220 an interleave operation to merge the vectors. */
46222 static bool
46223 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
46225 struct expand_vec_perm_d dremap, dfinal;
46226 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
46227 unsigned HOST_WIDE_INT contents;
46228 unsigned char remap[2 * MAX_VECT_LEN];
46229 rtx seq;
46230 bool ok, same_halves = false;
46232 if (GET_MODE_SIZE (d->vmode) == 16)
46234 if (d->one_operand_p)
46235 return false;
46237 else if (GET_MODE_SIZE (d->vmode) == 32)
46239 if (!TARGET_AVX)
46240 return false;
46241 /* For 32-byte modes allow even d->one_operand_p.
46242 The lack of cross-lane shuffling in some instructions
46243 might prevent a single insn shuffle. */
46244 dfinal = *d;
46245 dfinal.testing_p = true;
46246 /* If expand_vec_perm_interleave3 can expand this into
46247 a 3 insn sequence, give up and let it be expanded as
46248 3 insn sequence. While that is one insn longer,
46249 it doesn't need a memory operand and in the common
46250 case that both interleave low and high permutations
46251 with the same operands are adjacent needs 4 insns
46252 for both after CSE. */
46253 if (expand_vec_perm_interleave3 (&dfinal))
46254 return false;
46256 else
46257 return false;
46259 /* Examine from whence the elements come. */
46260 contents = 0;
46261 for (i = 0; i < nelt; ++i)
46262 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
46264 memset (remap, 0xff, sizeof (remap));
46265 dremap = *d;
46267 if (GET_MODE_SIZE (d->vmode) == 16)
46269 unsigned HOST_WIDE_INT h1, h2, h3, h4;
46271 /* Split the two input vectors into 4 halves. */
46272 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
46273 h2 = h1 << nelt2;
46274 h3 = h2 << nelt2;
46275 h4 = h3 << nelt2;
46277 /* If the elements from the low halves use interleave low, and similarly
46278 for interleave high. If the elements are from mis-matched halves, we
46279 can use shufps for V4SF/V4SI or do a DImode shuffle. */
46280 if ((contents & (h1 | h3)) == contents)
46282 /* punpckl* */
46283 for (i = 0; i < nelt2; ++i)
46285 remap[i] = i * 2;
46286 remap[i + nelt] = i * 2 + 1;
46287 dremap.perm[i * 2] = i;
46288 dremap.perm[i * 2 + 1] = i + nelt;
46290 if (!TARGET_SSE2 && d->vmode == V4SImode)
46291 dremap.vmode = V4SFmode;
46293 else if ((contents & (h2 | h4)) == contents)
46295 /* punpckh* */
46296 for (i = 0; i < nelt2; ++i)
46298 remap[i + nelt2] = i * 2;
46299 remap[i + nelt + nelt2] = i * 2 + 1;
46300 dremap.perm[i * 2] = i + nelt2;
46301 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
46303 if (!TARGET_SSE2 && d->vmode == V4SImode)
46304 dremap.vmode = V4SFmode;
46306 else if ((contents & (h1 | h4)) == contents)
46308 /* shufps */
46309 for (i = 0; i < nelt2; ++i)
46311 remap[i] = i;
46312 remap[i + nelt + nelt2] = i + nelt2;
46313 dremap.perm[i] = i;
46314 dremap.perm[i + nelt2] = i + nelt + nelt2;
46316 if (nelt != 4)
46318 /* shufpd */
46319 dremap.vmode = V2DImode;
46320 dremap.nelt = 2;
46321 dremap.perm[0] = 0;
46322 dremap.perm[1] = 3;
46325 else if ((contents & (h2 | h3)) == contents)
46327 /* shufps */
46328 for (i = 0; i < nelt2; ++i)
46330 remap[i + nelt2] = i;
46331 remap[i + nelt] = i + nelt2;
46332 dremap.perm[i] = i + nelt2;
46333 dremap.perm[i + nelt2] = i + nelt;
46335 if (nelt != 4)
46337 /* shufpd */
46338 dremap.vmode = V2DImode;
46339 dremap.nelt = 2;
46340 dremap.perm[0] = 1;
46341 dremap.perm[1] = 2;
46344 else
46345 return false;
46347 else
46349 unsigned int nelt4 = nelt / 4, nzcnt = 0;
46350 unsigned HOST_WIDE_INT q[8];
46351 unsigned int nonzero_halves[4];
46353 /* Split the two input vectors into 8 quarters. */
46354 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
46355 for (i = 1; i < 8; ++i)
46356 q[i] = q[0] << (nelt4 * i);
46357 for (i = 0; i < 4; ++i)
46358 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
46360 nonzero_halves[nzcnt] = i;
46361 ++nzcnt;
46364 if (nzcnt == 1)
46366 gcc_assert (d->one_operand_p);
46367 nonzero_halves[1] = nonzero_halves[0];
46368 same_halves = true;
46370 else if (d->one_operand_p)
46372 gcc_assert (nonzero_halves[0] == 0);
46373 gcc_assert (nonzero_halves[1] == 1);
46376 if (nzcnt <= 2)
46378 if (d->perm[0] / nelt2 == nonzero_halves[1])
46380 /* Attempt to increase the likelihood that dfinal
46381 shuffle will be intra-lane. */
46382 char tmph = nonzero_halves[0];
46383 nonzero_halves[0] = nonzero_halves[1];
46384 nonzero_halves[1] = tmph;
46387 /* vperm2f128 or vperm2i128. */
46388 for (i = 0; i < nelt2; ++i)
46390 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
46391 remap[i + nonzero_halves[0] * nelt2] = i;
46392 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
46393 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
46396 if (d->vmode != V8SFmode
46397 && d->vmode != V4DFmode
46398 && d->vmode != V8SImode)
46400 dremap.vmode = V8SImode;
46401 dremap.nelt = 8;
46402 for (i = 0; i < 4; ++i)
46404 dremap.perm[i] = i + nonzero_halves[0] * 4;
46405 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
46409 else if (d->one_operand_p)
46410 return false;
46411 else if (TARGET_AVX2
46412 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
46414 /* vpunpckl* */
46415 for (i = 0; i < nelt4; ++i)
46417 remap[i] = i * 2;
46418 remap[i + nelt] = i * 2 + 1;
46419 remap[i + nelt2] = i * 2 + nelt2;
46420 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
46421 dremap.perm[i * 2] = i;
46422 dremap.perm[i * 2 + 1] = i + nelt;
46423 dremap.perm[i * 2 + nelt2] = i + nelt2;
46424 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
46427 else if (TARGET_AVX2
46428 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
46430 /* vpunpckh* */
46431 for (i = 0; i < nelt4; ++i)
46433 remap[i + nelt4] = i * 2;
46434 remap[i + nelt + nelt4] = i * 2 + 1;
46435 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
46436 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
46437 dremap.perm[i * 2] = i + nelt4;
46438 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
46439 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
46440 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
46443 else
46444 return false;
46447 /* Use the remapping array set up above to move the elements from their
46448 swizzled locations into their final destinations. */
46449 dfinal = *d;
46450 for (i = 0; i < nelt; ++i)
46452 unsigned e = remap[d->perm[i]];
46453 gcc_assert (e < nelt);
46454 /* If same_halves is true, both halves of the remapped vector are the
46455 same. Avoid cross-lane accesses if possible. */
46456 if (same_halves && i >= nelt2)
46458 gcc_assert (e < nelt2);
46459 dfinal.perm[i] = e + nelt2;
46461 else
46462 dfinal.perm[i] = e;
46464 if (!d->testing_p)
46466 dremap.target = gen_reg_rtx (dremap.vmode);
46467 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46469 dfinal.op1 = dfinal.op0;
46470 dfinal.one_operand_p = true;
46472 /* Test if the final remap can be done with a single insn. For V4SFmode or
46473 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
46474 start_sequence ();
46475 ok = expand_vec_perm_1 (&dfinal);
46476 seq = get_insns ();
46477 end_sequence ();
46479 if (!ok)
46480 return false;
46482 if (d->testing_p)
46483 return true;
46485 if (dremap.vmode != dfinal.vmode)
46487 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
46488 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
46491 ok = expand_vec_perm_1 (&dremap);
46492 gcc_assert (ok);
46494 emit_insn (seq);
46495 return true;
46498 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46499 a single vector cross-lane permutation into vpermq followed
46500 by any of the single insn permutations. */
46502 static bool
46503 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
46505 struct expand_vec_perm_d dremap, dfinal;
46506 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
46507 unsigned contents[2];
46508 bool ok;
46510 if (!(TARGET_AVX2
46511 && (d->vmode == V32QImode || d->vmode == V16HImode)
46512 && d->one_operand_p))
46513 return false;
46515 contents[0] = 0;
46516 contents[1] = 0;
46517 for (i = 0; i < nelt2; ++i)
46519 contents[0] |= 1u << (d->perm[i] / nelt4);
46520 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
46523 for (i = 0; i < 2; ++i)
46525 unsigned int cnt = 0;
46526 for (j = 0; j < 4; ++j)
46527 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
46528 return false;
46531 if (d->testing_p)
46532 return true;
46534 dremap = *d;
46535 dremap.vmode = V4DImode;
46536 dremap.nelt = 4;
46537 dremap.target = gen_reg_rtx (V4DImode);
46538 dremap.op0 = gen_lowpart (V4DImode, d->op0);
46539 dremap.op1 = dremap.op0;
46540 dremap.one_operand_p = true;
46541 for (i = 0; i < 2; ++i)
46543 unsigned int cnt = 0;
46544 for (j = 0; j < 4; ++j)
46545 if ((contents[i] & (1u << j)) != 0)
46546 dremap.perm[2 * i + cnt++] = j;
46547 for (; cnt < 2; ++cnt)
46548 dremap.perm[2 * i + cnt] = 0;
46551 dfinal = *d;
46552 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
46553 dfinal.op1 = dfinal.op0;
46554 dfinal.one_operand_p = true;
46555 for (i = 0, j = 0; i < nelt; ++i)
46557 if (i == nelt2)
46558 j = 2;
46559 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
46560 if ((d->perm[i] / nelt4) == dremap.perm[j])
46562 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
46563 dfinal.perm[i] |= nelt4;
46564 else
46565 gcc_unreachable ();
46568 ok = expand_vec_perm_1 (&dremap);
46569 gcc_assert (ok);
46571 ok = expand_vec_perm_1 (&dfinal);
46572 gcc_assert (ok);
46574 return true;
46577 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
46578 a vector permutation using two instructions, vperm2f128 resp.
46579 vperm2i128 followed by any single in-lane permutation. */
46581 static bool
46582 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
46584 struct expand_vec_perm_d dfirst, dsecond;
46585 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
46586 bool ok;
46588 if (!TARGET_AVX
46589 || GET_MODE_SIZE (d->vmode) != 32
46590 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
46591 return false;
46593 dsecond = *d;
46594 dsecond.one_operand_p = false;
46595 dsecond.testing_p = true;
46597 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
46598 immediate. For perm < 16 the second permutation uses
46599 d->op0 as first operand, for perm >= 16 it uses d->op1
46600 as first operand. The second operand is the result of
46601 vperm2[fi]128. */
46602 for (perm = 0; perm < 32; perm++)
46604 /* Ignore permutations which do not move anything cross-lane. */
46605 if (perm < 16)
46607 /* The second shuffle for e.g. V4DFmode has
46608 0123 and ABCD operands.
46609 Ignore AB23, as 23 is already in the second lane
46610 of the first operand. */
46611 if ((perm & 0xc) == (1 << 2)) continue;
46612 /* And 01CD, as 01 is in the first lane of the first
46613 operand. */
46614 if ((perm & 3) == 0) continue;
46615 /* And 4567, as then the vperm2[fi]128 doesn't change
46616 anything on the original 4567 second operand. */
46617 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
46619 else
46621 /* The second shuffle for e.g. V4DFmode has
46622 4567 and ABCD operands.
46623 Ignore AB67, as 67 is already in the second lane
46624 of the first operand. */
46625 if ((perm & 0xc) == (3 << 2)) continue;
46626 /* And 45CD, as 45 is in the first lane of the first
46627 operand. */
46628 if ((perm & 3) == 2) continue;
46629 /* And 0123, as then the vperm2[fi]128 doesn't change
46630 anything on the original 0123 first operand. */
46631 if ((perm & 0xf) == (1 << 2)) continue;
46634 for (i = 0; i < nelt; i++)
46636 j = d->perm[i] / nelt2;
46637 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
46638 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
46639 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
46640 dsecond.perm[i] = d->perm[i] & (nelt - 1);
46641 else
46642 break;
46645 if (i == nelt)
46647 start_sequence ();
46648 ok = expand_vec_perm_1 (&dsecond);
46649 end_sequence ();
46651 else
46652 ok = false;
46654 if (ok)
46656 if (d->testing_p)
46657 return true;
46659 /* Found a usable second shuffle. dfirst will be
46660 vperm2f128 on d->op0 and d->op1. */
46661 dsecond.testing_p = false;
46662 dfirst = *d;
46663 dfirst.target = gen_reg_rtx (d->vmode);
46664 for (i = 0; i < nelt; i++)
46665 dfirst.perm[i] = (i & (nelt2 - 1))
46666 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
46668 ok = expand_vec_perm_1 (&dfirst);
46669 gcc_assert (ok);
46671 /* And dsecond is some single insn shuffle, taking
46672 d->op0 and result of vperm2f128 (if perm < 16) or
46673 d->op1 and result of vperm2f128 (otherwise). */
46674 dsecond.op1 = dfirst.target;
46675 if (perm >= 16)
46676 dsecond.op0 = dfirst.op1;
46678 ok = expand_vec_perm_1 (&dsecond);
46679 gcc_assert (ok);
46681 return true;
46684 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
46685 if (d->one_operand_p)
46686 return false;
46689 return false;
46692 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
46693 a two vector permutation using 2 intra-lane interleave insns
46694 and cross-lane shuffle for 32-byte vectors. */
46696 static bool
46697 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
46699 unsigned i, nelt;
46700 rtx (*gen) (rtx, rtx, rtx);
46702 if (d->one_operand_p)
46703 return false;
46704 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
46706 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
46708 else
46709 return false;
46711 nelt = d->nelt;
46712 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
46713 return false;
46714 for (i = 0; i < nelt; i += 2)
46715 if (d->perm[i] != d->perm[0] + i / 2
46716 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
46717 return false;
46719 if (d->testing_p)
46720 return true;
46722 switch (d->vmode)
46724 case V32QImode:
46725 if (d->perm[0])
46726 gen = gen_vec_interleave_highv32qi;
46727 else
46728 gen = gen_vec_interleave_lowv32qi;
46729 break;
46730 case V16HImode:
46731 if (d->perm[0])
46732 gen = gen_vec_interleave_highv16hi;
46733 else
46734 gen = gen_vec_interleave_lowv16hi;
46735 break;
46736 case V8SImode:
46737 if (d->perm[0])
46738 gen = gen_vec_interleave_highv8si;
46739 else
46740 gen = gen_vec_interleave_lowv8si;
46741 break;
46742 case V4DImode:
46743 if (d->perm[0])
46744 gen = gen_vec_interleave_highv4di;
46745 else
46746 gen = gen_vec_interleave_lowv4di;
46747 break;
46748 case V8SFmode:
46749 if (d->perm[0])
46750 gen = gen_vec_interleave_highv8sf;
46751 else
46752 gen = gen_vec_interleave_lowv8sf;
46753 break;
46754 case V4DFmode:
46755 if (d->perm[0])
46756 gen = gen_vec_interleave_highv4df;
46757 else
46758 gen = gen_vec_interleave_lowv4df;
46759 break;
46760 default:
46761 gcc_unreachable ();
46764 emit_insn (gen (d->target, d->op0, d->op1));
46765 return true;
46768 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
46769 a single vector permutation using a single intra-lane vector
46770 permutation, vperm2f128 swapping the lanes and vblend* insn blending
46771 the non-swapped and swapped vectors together. */
46773 static bool
46774 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
46776 struct expand_vec_perm_d dfirst, dsecond;
46777 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
46778 rtx seq;
46779 bool ok;
46780 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
46782 if (!TARGET_AVX
46783 || TARGET_AVX2
46784 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
46785 || !d->one_operand_p)
46786 return false;
46788 dfirst = *d;
46789 for (i = 0; i < nelt; i++)
46790 dfirst.perm[i] = 0xff;
46791 for (i = 0, msk = 0; i < nelt; i++)
46793 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
46794 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
46795 return false;
46796 dfirst.perm[j] = d->perm[i];
46797 if (j != i)
46798 msk |= (1 << i);
46800 for (i = 0; i < nelt; i++)
46801 if (dfirst.perm[i] == 0xff)
46802 dfirst.perm[i] = i;
46804 if (!d->testing_p)
46805 dfirst.target = gen_reg_rtx (dfirst.vmode);
46807 start_sequence ();
46808 ok = expand_vec_perm_1 (&dfirst);
46809 seq = get_insns ();
46810 end_sequence ();
46812 if (!ok)
46813 return false;
46815 if (d->testing_p)
46816 return true;
46818 emit_insn (seq);
46820 dsecond = *d;
46821 dsecond.op0 = dfirst.target;
46822 dsecond.op1 = dfirst.target;
46823 dsecond.one_operand_p = true;
46824 dsecond.target = gen_reg_rtx (dsecond.vmode);
46825 for (i = 0; i < nelt; i++)
46826 dsecond.perm[i] = i ^ nelt2;
46828 ok = expand_vec_perm_1 (&dsecond);
46829 gcc_assert (ok);
46831 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
46832 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
46833 return true;
46836 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
46837 permutation using two vperm2f128, followed by a vshufpd insn blending
46838 the two vectors together. */
46840 static bool
46841 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
46843 struct expand_vec_perm_d dfirst, dsecond, dthird;
46844 bool ok;
46846 if (!TARGET_AVX || (d->vmode != V4DFmode))
46847 return false;
46849 if (d->testing_p)
46850 return true;
46852 dfirst = *d;
46853 dsecond = *d;
46854 dthird = *d;
46856 dfirst.perm[0] = (d->perm[0] & ~1);
46857 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
46858 dfirst.perm[2] = (d->perm[2] & ~1);
46859 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
46860 dsecond.perm[0] = (d->perm[1] & ~1);
46861 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
46862 dsecond.perm[2] = (d->perm[3] & ~1);
46863 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
46864 dthird.perm[0] = (d->perm[0] % 2);
46865 dthird.perm[1] = (d->perm[1] % 2) + 4;
46866 dthird.perm[2] = (d->perm[2] % 2) + 2;
46867 dthird.perm[3] = (d->perm[3] % 2) + 6;
46869 dfirst.target = gen_reg_rtx (dfirst.vmode);
46870 dsecond.target = gen_reg_rtx (dsecond.vmode);
46871 dthird.op0 = dfirst.target;
46872 dthird.op1 = dsecond.target;
46873 dthird.one_operand_p = false;
46875 canonicalize_perm (&dfirst);
46876 canonicalize_perm (&dsecond);
46878 ok = expand_vec_perm_1 (&dfirst)
46879 && expand_vec_perm_1 (&dsecond)
46880 && expand_vec_perm_1 (&dthird);
46882 gcc_assert (ok);
46884 return true;
46887 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
46888 permutation with two pshufb insns and an ior. We should have already
46889 failed all two instruction sequences. */
46891 static bool
46892 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
46894 rtx rperm[2][16], vperm, l, h, op, m128;
46895 unsigned int i, nelt, eltsz;
46897 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
46898 return false;
46899 gcc_assert (!d->one_operand_p);
46901 if (d->testing_p)
46902 return true;
46904 nelt = d->nelt;
46905 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
46907 /* Generate two permutation masks. If the required element is within
46908 the given vector it is shuffled into the proper lane. If the required
46909 element is in the other vector, force a zero into the lane by setting
46910 bit 7 in the permutation mask. */
46911 m128 = GEN_INT (-128);
46912 for (i = 0; i < nelt; ++i)
46914 unsigned j, e = d->perm[i];
46915 unsigned which = (e >= nelt);
46916 if (e >= nelt)
46917 e -= nelt;
46919 for (j = 0; j < eltsz; ++j)
46921 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
46922 rperm[1-which][i*eltsz + j] = m128;
46926 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
46927 vperm = force_reg (V16QImode, vperm);
46929 l = gen_reg_rtx (V16QImode);
46930 op = gen_lowpart (V16QImode, d->op0);
46931 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
46933 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
46934 vperm = force_reg (V16QImode, vperm);
46936 h = gen_reg_rtx (V16QImode);
46937 op = gen_lowpart (V16QImode, d->op1);
46938 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
46940 op = d->target;
46941 if (d->vmode != V16QImode)
46942 op = gen_reg_rtx (V16QImode);
46943 emit_insn (gen_iorv16qi3 (op, l, h));
46944 if (op != d->target)
46945 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
46947 return true;
46950 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
46951 with two vpshufb insns, vpermq and vpor. We should have already failed
46952 all two or three instruction sequences. */
46954 static bool
46955 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
46957 rtx rperm[2][32], vperm, l, h, hp, op, m128;
46958 unsigned int i, nelt, eltsz;
46960 if (!TARGET_AVX2
46961 || !d->one_operand_p
46962 || (d->vmode != V32QImode && d->vmode != V16HImode))
46963 return false;
46965 if (d->testing_p)
46966 return true;
46968 nelt = d->nelt;
46969 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
46971 /* Generate two permutation masks. If the required element is within
46972 the same lane, it is shuffled in. If the required element from the
46973 other lane, force a zero by setting bit 7 in the permutation mask.
46974 In the other mask the mask has non-negative elements if element
46975 is requested from the other lane, but also moved to the other lane,
46976 so that the result of vpshufb can have the two V2TImode halves
46977 swapped. */
46978 m128 = GEN_INT (-128);
46979 for (i = 0; i < nelt; ++i)
46981 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
46982 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
46984 for (j = 0; j < eltsz; ++j)
46986 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
46987 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
46991 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
46992 vperm = force_reg (V32QImode, vperm);
46994 h = gen_reg_rtx (V32QImode);
46995 op = gen_lowpart (V32QImode, d->op0);
46996 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
46998 /* Swap the 128-byte lanes of h into hp. */
46999 hp = gen_reg_rtx (V4DImode);
47000 op = gen_lowpart (V4DImode, h);
47001 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
47002 const1_rtx));
47004 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
47005 vperm = force_reg (V32QImode, vperm);
47007 l = gen_reg_rtx (V32QImode);
47008 op = gen_lowpart (V32QImode, d->op0);
47009 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
47011 op = d->target;
47012 if (d->vmode != V32QImode)
47013 op = gen_reg_rtx (V32QImode);
47014 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
47015 if (op != d->target)
47016 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47018 return true;
47021 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
47022 and extract-odd permutations of two V32QImode and V16QImode operand
47023 with two vpshufb insns, vpor and vpermq. We should have already
47024 failed all two or three instruction sequences. */
47026 static bool
47027 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
47029 rtx rperm[2][32], vperm, l, h, ior, op, m128;
47030 unsigned int i, nelt, eltsz;
47032 if (!TARGET_AVX2
47033 || d->one_operand_p
47034 || (d->vmode != V32QImode && d->vmode != V16HImode))
47035 return false;
47037 for (i = 0; i < d->nelt; ++i)
47038 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
47039 return false;
47041 if (d->testing_p)
47042 return true;
47044 nelt = d->nelt;
47045 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47047 /* Generate two permutation masks. In the first permutation mask
47048 the first quarter will contain indexes for the first half
47049 of the op0, the second quarter will contain bit 7 set, third quarter
47050 will contain indexes for the second half of the op0 and the
47051 last quarter bit 7 set. In the second permutation mask
47052 the first quarter will contain bit 7 set, the second quarter
47053 indexes for the first half of the op1, the third quarter bit 7 set
47054 and last quarter indexes for the second half of the op1.
47055 I.e. the first mask e.g. for V32QImode extract even will be:
47056 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
47057 (all values masked with 0xf except for -128) and second mask
47058 for extract even will be
47059 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
47060 m128 = GEN_INT (-128);
47061 for (i = 0; i < nelt; ++i)
47063 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47064 unsigned which = d->perm[i] >= nelt;
47065 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
47067 for (j = 0; j < eltsz; ++j)
47069 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
47070 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
47074 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
47075 vperm = force_reg (V32QImode, vperm);
47077 l = gen_reg_rtx (V32QImode);
47078 op = gen_lowpart (V32QImode, d->op0);
47079 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
47081 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
47082 vperm = force_reg (V32QImode, vperm);
47084 h = gen_reg_rtx (V32QImode);
47085 op = gen_lowpart (V32QImode, d->op1);
47086 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
47088 ior = gen_reg_rtx (V32QImode);
47089 emit_insn (gen_iorv32qi3 (ior, l, h));
47091 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
47092 op = gen_reg_rtx (V4DImode);
47093 ior = gen_lowpart (V4DImode, ior);
47094 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
47095 const1_rtx, GEN_INT (3)));
47096 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47098 return true;
47101 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
47102 and extract-odd permutations. */
47104 static bool
47105 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
47107 rtx t1, t2, t3, t4, t5;
47109 switch (d->vmode)
47111 case V4DFmode:
47112 if (d->testing_p)
47113 break;
47114 t1 = gen_reg_rtx (V4DFmode);
47115 t2 = gen_reg_rtx (V4DFmode);
47117 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47118 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
47119 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
47121 /* Now an unpck[lh]pd will produce the result required. */
47122 if (odd)
47123 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
47124 else
47125 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
47126 emit_insn (t3);
47127 break;
47129 case V8SFmode:
47131 int mask = odd ? 0xdd : 0x88;
47133 if (d->testing_p)
47134 break;
47135 t1 = gen_reg_rtx (V8SFmode);
47136 t2 = gen_reg_rtx (V8SFmode);
47137 t3 = gen_reg_rtx (V8SFmode);
47139 /* Shuffle within the 128-bit lanes to produce:
47140 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
47141 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
47142 GEN_INT (mask)));
47144 /* Shuffle the lanes around to produce:
47145 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
47146 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
47147 GEN_INT (0x3)));
47149 /* Shuffle within the 128-bit lanes to produce:
47150 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
47151 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
47153 /* Shuffle within the 128-bit lanes to produce:
47154 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
47155 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
47157 /* Shuffle the lanes around to produce:
47158 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
47159 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
47160 GEN_INT (0x20)));
47162 break;
47164 case V2DFmode:
47165 case V4SFmode:
47166 case V2DImode:
47167 case V4SImode:
47168 /* These are always directly implementable by expand_vec_perm_1. */
47169 gcc_unreachable ();
47171 case V8HImode:
47172 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
47173 return expand_vec_perm_pshufb2 (d);
47174 else
47176 if (d->testing_p)
47177 break;
47178 /* We need 2*log2(N)-1 operations to achieve odd/even
47179 with interleave. */
47180 t1 = gen_reg_rtx (V8HImode);
47181 t2 = gen_reg_rtx (V8HImode);
47182 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
47183 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
47184 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
47185 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
47186 if (odd)
47187 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
47188 else
47189 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
47190 emit_insn (t3);
47192 break;
47194 case V16QImode:
47195 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
47196 return expand_vec_perm_pshufb2 (d);
47197 else
47199 if (d->testing_p)
47200 break;
47201 t1 = gen_reg_rtx (V16QImode);
47202 t2 = gen_reg_rtx (V16QImode);
47203 t3 = gen_reg_rtx (V16QImode);
47204 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
47205 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
47206 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
47207 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
47208 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
47209 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
47210 if (odd)
47211 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
47212 else
47213 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
47214 emit_insn (t3);
47216 break;
47218 case V16HImode:
47219 case V32QImode:
47220 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
47222 case V4DImode:
47223 if (!TARGET_AVX2)
47225 struct expand_vec_perm_d d_copy = *d;
47226 d_copy.vmode = V4DFmode;
47227 if (d->testing_p)
47228 d_copy.target = gen_lowpart (V4DFmode, d->target);
47229 else
47230 d_copy.target = gen_reg_rtx (V4DFmode);
47231 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
47232 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
47233 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
47235 if (!d->testing_p)
47236 emit_move_insn (d->target,
47237 gen_lowpart (V4DImode, d_copy.target));
47238 return true;
47240 return false;
47243 if (d->testing_p)
47244 break;
47246 t1 = gen_reg_rtx (V4DImode);
47247 t2 = gen_reg_rtx (V4DImode);
47249 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
47250 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
47251 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
47253 /* Now an vpunpck[lh]qdq will produce the result required. */
47254 if (odd)
47255 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
47256 else
47257 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
47258 emit_insn (t3);
47259 break;
47261 case V8SImode:
47262 if (!TARGET_AVX2)
47264 struct expand_vec_perm_d d_copy = *d;
47265 d_copy.vmode = V8SFmode;
47266 if (d->testing_p)
47267 d_copy.target = gen_lowpart (V8SFmode, d->target);
47268 else
47269 d_copy.target = gen_reg_rtx (V8SFmode);
47270 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
47271 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
47272 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
47274 if (!d->testing_p)
47275 emit_move_insn (d->target,
47276 gen_lowpart (V8SImode, d_copy.target));
47277 return true;
47279 return false;
47282 if (d->testing_p)
47283 break;
47285 t1 = gen_reg_rtx (V8SImode);
47286 t2 = gen_reg_rtx (V8SImode);
47287 t3 = gen_reg_rtx (V4DImode);
47288 t4 = gen_reg_rtx (V4DImode);
47289 t5 = gen_reg_rtx (V4DImode);
47291 /* Shuffle the lanes around into
47292 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
47293 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
47294 gen_lowpart (V4DImode, d->op1),
47295 GEN_INT (0x20)));
47296 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
47297 gen_lowpart (V4DImode, d->op1),
47298 GEN_INT (0x31)));
47300 /* Swap the 2nd and 3rd position in each lane into
47301 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
47302 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
47303 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47304 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
47305 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
47307 /* Now an vpunpck[lh]qdq will produce
47308 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
47309 if (odd)
47310 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
47311 gen_lowpart (V4DImode, t2));
47312 else
47313 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
47314 gen_lowpart (V4DImode, t2));
47315 emit_insn (t3);
47316 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
47317 break;
47319 default:
47320 gcc_unreachable ();
47323 return true;
47326 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47327 extract-even and extract-odd permutations. */
47329 static bool
47330 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
47332 unsigned i, odd, nelt = d->nelt;
47334 odd = d->perm[0];
47335 if (odd != 0 && odd != 1)
47336 return false;
47338 for (i = 1; i < nelt; ++i)
47339 if (d->perm[i] != 2 * i + odd)
47340 return false;
47342 return expand_vec_perm_even_odd_1 (d, odd);
47345 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
47346 permutations. We assume that expand_vec_perm_1 has already failed. */
47348 static bool
47349 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
47351 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
47352 enum machine_mode vmode = d->vmode;
47353 unsigned char perm2[4];
47354 rtx op0 = d->op0, dest;
47355 bool ok;
47357 switch (vmode)
47359 case V4DFmode:
47360 case V8SFmode:
47361 /* These are special-cased in sse.md so that we can optionally
47362 use the vbroadcast instruction. They expand to two insns
47363 if the input happens to be in a register. */
47364 gcc_unreachable ();
47366 case V2DFmode:
47367 case V2DImode:
47368 case V4SFmode:
47369 case V4SImode:
47370 /* These are always implementable using standard shuffle patterns. */
47371 gcc_unreachable ();
47373 case V8HImode:
47374 case V16QImode:
47375 /* These can be implemented via interleave. We save one insn by
47376 stopping once we have promoted to V4SImode and then use pshufd. */
47377 if (d->testing_p)
47378 return true;
47381 rtx dest;
47382 rtx (*gen) (rtx, rtx, rtx)
47383 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
47384 : gen_vec_interleave_lowv8hi;
47386 if (elt >= nelt2)
47388 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
47389 : gen_vec_interleave_highv8hi;
47390 elt -= nelt2;
47392 nelt2 /= 2;
47394 dest = gen_reg_rtx (vmode);
47395 emit_insn (gen (dest, op0, op0));
47396 vmode = get_mode_wider_vector (vmode);
47397 op0 = gen_lowpart (vmode, dest);
47399 while (vmode != V4SImode);
47401 memset (perm2, elt, 4);
47402 dest = gen_reg_rtx (V4SImode);
47403 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
47404 gcc_assert (ok);
47405 if (!d->testing_p)
47406 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
47407 return true;
47409 case V32QImode:
47410 case V16HImode:
47411 case V8SImode:
47412 case V4DImode:
47413 /* For AVX2 broadcasts of the first element vpbroadcast* or
47414 vpermq should be used by expand_vec_perm_1. */
47415 gcc_assert (!TARGET_AVX2 || d->perm[0]);
47416 return false;
47418 default:
47419 gcc_unreachable ();
47423 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
47424 broadcast permutations. */
47426 static bool
47427 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
47429 unsigned i, elt, nelt = d->nelt;
47431 if (!d->one_operand_p)
47432 return false;
47434 elt = d->perm[0];
47435 for (i = 1; i < nelt; ++i)
47436 if (d->perm[i] != elt)
47437 return false;
47439 return expand_vec_perm_broadcast_1 (d);
47442 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
47443 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
47444 all the shorter instruction sequences. */
47446 static bool
47447 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
47449 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
47450 unsigned int i, nelt, eltsz;
47451 bool used[4];
47453 if (!TARGET_AVX2
47454 || d->one_operand_p
47455 || (d->vmode != V32QImode && d->vmode != V16HImode))
47456 return false;
47458 if (d->testing_p)
47459 return true;
47461 nelt = d->nelt;
47462 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47464 /* Generate 4 permutation masks. If the required element is within
47465 the same lane, it is shuffled in. If the required element from the
47466 other lane, force a zero by setting bit 7 in the permutation mask.
47467 In the other mask the mask has non-negative elements if element
47468 is requested from the other lane, but also moved to the other lane,
47469 so that the result of vpshufb can have the two V2TImode halves
47470 swapped. */
47471 m128 = GEN_INT (-128);
47472 for (i = 0; i < 32; ++i)
47474 rperm[0][i] = m128;
47475 rperm[1][i] = m128;
47476 rperm[2][i] = m128;
47477 rperm[3][i] = m128;
47479 used[0] = false;
47480 used[1] = false;
47481 used[2] = false;
47482 used[3] = false;
47483 for (i = 0; i < nelt; ++i)
47485 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
47486 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
47487 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
47489 for (j = 0; j < eltsz; ++j)
47490 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
47491 used[which] = true;
47494 for (i = 0; i < 2; ++i)
47496 if (!used[2 * i + 1])
47498 h[i] = NULL_RTX;
47499 continue;
47501 vperm = gen_rtx_CONST_VECTOR (V32QImode,
47502 gen_rtvec_v (32, rperm[2 * i + 1]));
47503 vperm = force_reg (V32QImode, vperm);
47504 h[i] = gen_reg_rtx (V32QImode);
47505 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47506 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
47509 /* Swap the 128-byte lanes of h[X]. */
47510 for (i = 0; i < 2; ++i)
47512 if (h[i] == NULL_RTX)
47513 continue;
47514 op = gen_reg_rtx (V4DImode);
47515 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
47516 const2_rtx, GEN_INT (3), const0_rtx,
47517 const1_rtx));
47518 h[i] = gen_lowpart (V32QImode, op);
47521 for (i = 0; i < 2; ++i)
47523 if (!used[2 * i])
47525 l[i] = NULL_RTX;
47526 continue;
47528 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
47529 vperm = force_reg (V32QImode, vperm);
47530 l[i] = gen_reg_rtx (V32QImode);
47531 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
47532 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
47535 for (i = 0; i < 2; ++i)
47537 if (h[i] && l[i])
47539 op = gen_reg_rtx (V32QImode);
47540 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
47541 l[i] = op;
47543 else if (h[i])
47544 l[i] = h[i];
47547 gcc_assert (l[0] && l[1]);
47548 op = d->target;
47549 if (d->vmode != V32QImode)
47550 op = gen_reg_rtx (V32QImode);
47551 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
47552 if (op != d->target)
47553 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
47554 return true;
47557 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
47558 With all of the interface bits taken care of, perform the expansion
47559 in D and return true on success. */
47561 static bool
47562 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
47564 /* Try a single instruction expansion. */
47565 if (expand_vec_perm_1 (d))
47566 return true;
47568 /* Try sequences of two instructions. */
47569 /* ix86_expand_vec_perm_vpermi2 is also called from
47570 * ix86_expand_vec_perm. So it doesn't take d as parameter.
47571 * Construct needed params. */
47572 rtx vec[64];
47573 int i;
47574 for (i = 0; i < d->nelt; ++i)
47575 vec[i] = GEN_INT (d->perm[i]);
47576 rtx sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, vec));
47577 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, sel, d->op1))
47578 return true;
47580 if (expand_vec_perm_pshuflw_pshufhw (d))
47581 return true;
47583 if (expand_vec_perm_palignr (d))
47584 return true;
47586 if (expand_vec_perm_interleave2 (d))
47587 return true;
47589 if (expand_vec_perm_broadcast (d))
47590 return true;
47592 if (expand_vec_perm_vpermq_perm_1 (d))
47593 return true;
47595 if (expand_vec_perm_vperm2f128 (d))
47596 return true;
47598 if (expand_vec_perm_pblendv (d))
47599 return true;
47601 /* Try sequences of three instructions. */
47603 if (expand_vec_perm_2vperm2f128_vshuf (d))
47604 return true;
47606 if (expand_vec_perm_pshufb2 (d))
47607 return true;
47609 if (expand_vec_perm_interleave3 (d))
47610 return true;
47612 if (expand_vec_perm_vperm2f128_vblend (d))
47613 return true;
47615 /* Try sequences of four instructions. */
47617 if (expand_vec_perm_vpshufb2_vpermq (d))
47618 return true;
47620 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
47621 return true;
47623 /* ??? Look for narrow permutations whose element orderings would
47624 allow the promotion to a wider mode. */
47626 /* ??? Look for sequences of interleave or a wider permute that place
47627 the data into the correct lanes for a half-vector shuffle like
47628 pshuf[lh]w or vpermilps. */
47630 /* ??? Look for sequences of interleave that produce the desired results.
47631 The combinatorics of punpck[lh] get pretty ugly... */
47633 if (expand_vec_perm_even_odd (d))
47634 return true;
47636 /* Even longer sequences. */
47637 if (expand_vec_perm_vpshufb4_vpermq2 (d))
47638 return true;
47640 return false;
47643 /* If a permutation only uses one operand, make it clear. Returns true
47644 if the permutation references both operands. */
47646 static bool
47647 canonicalize_perm (struct expand_vec_perm_d *d)
47649 int i, which, nelt = d->nelt;
47651 for (i = which = 0; i < nelt; ++i)
47652 which |= (d->perm[i] < nelt ? 1 : 2);
47654 d->one_operand_p = true;
47655 switch (which)
47657 default:
47658 gcc_unreachable();
47660 case 3:
47661 if (!rtx_equal_p (d->op0, d->op1))
47663 d->one_operand_p = false;
47664 break;
47666 /* The elements of PERM do not suggest that only the first operand
47667 is used, but both operands are identical. Allow easier matching
47668 of the permutation by folding the permutation into the single
47669 input vector. */
47670 /* FALLTHRU */
47672 case 2:
47673 for (i = 0; i < nelt; ++i)
47674 d->perm[i] &= nelt - 1;
47675 d->op0 = d->op1;
47676 break;
47678 case 1:
47679 d->op1 = d->op0;
47680 break;
47683 return (which == 3);
47686 bool
47687 ix86_expand_vec_perm_const (rtx operands[4])
47689 struct expand_vec_perm_d d;
47690 unsigned char perm[MAX_VECT_LEN];
47691 int i, nelt;
47692 bool two_args;
47693 rtx sel;
47695 d.target = operands[0];
47696 d.op0 = operands[1];
47697 d.op1 = operands[2];
47698 sel = operands[3];
47700 d.vmode = GET_MODE (d.target);
47701 gcc_assert (VECTOR_MODE_P (d.vmode));
47702 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47703 d.testing_p = false;
47705 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
47706 gcc_assert (XVECLEN (sel, 0) == nelt);
47707 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
47709 for (i = 0; i < nelt; ++i)
47711 rtx e = XVECEXP (sel, 0, i);
47712 int ei = INTVAL (e) & (2 * nelt - 1);
47713 d.perm[i] = ei;
47714 perm[i] = ei;
47717 two_args = canonicalize_perm (&d);
47719 if (ix86_expand_vec_perm_const_1 (&d))
47720 return true;
47722 /* If the selector says both arguments are needed, but the operands are the
47723 same, the above tried to expand with one_operand_p and flattened selector.
47724 If that didn't work, retry without one_operand_p; we succeeded with that
47725 during testing. */
47726 if (two_args && d.one_operand_p)
47728 d.one_operand_p = false;
47729 memcpy (d.perm, perm, sizeof (perm));
47730 return ix86_expand_vec_perm_const_1 (&d);
47733 return false;
47736 /* Implement targetm.vectorize.vec_perm_const_ok. */
47738 static bool
47739 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
47740 const unsigned char *sel)
47742 struct expand_vec_perm_d d;
47743 unsigned int i, nelt, which;
47744 bool ret;
47746 d.vmode = vmode;
47747 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47748 d.testing_p = true;
47750 /* Given sufficient ISA support we can just return true here
47751 for selected vector modes. */
47752 if (d.vmode == V16SImode || d.vmode == V16SFmode
47753 || d.vmode == V8DFmode || d.vmode == V8DImode
47754 || d.vmode == V32HImode || d.vmode == V64QImode)
47755 /* All implementable with a single vpermi2 insn. */
47756 return true;
47757 if (GET_MODE_SIZE (d.vmode) == 16)
47759 /* All implementable with a single vpperm insn. */
47760 if (TARGET_XOP)
47761 return true;
47762 /* All implementable with 2 pshufb + 1 ior. */
47763 if (TARGET_SSSE3)
47764 return true;
47765 /* All implementable with shufpd or unpck[lh]pd. */
47766 if (d.nelt == 2)
47767 return true;
47770 /* Extract the values from the vector CST into the permutation
47771 array in D. */
47772 memcpy (d.perm, sel, nelt);
47773 for (i = which = 0; i < nelt; ++i)
47775 unsigned char e = d.perm[i];
47776 gcc_assert (e < 2 * nelt);
47777 which |= (e < nelt ? 1 : 2);
47780 /* For all elements from second vector, fold the elements to first. */
47781 if (which == 2)
47782 for (i = 0; i < nelt; ++i)
47783 d.perm[i] -= nelt;
47785 /* Check whether the mask can be applied to the vector type. */
47786 d.one_operand_p = (which != 3);
47788 /* Implementable with shufps or pshufd. */
47789 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
47790 return true;
47792 /* Otherwise we have to go through the motions and see if we can
47793 figure out how to generate the requested permutation. */
47794 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
47795 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
47796 if (!d.one_operand_p)
47797 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
47799 start_sequence ();
47800 ret = ix86_expand_vec_perm_const_1 (&d);
47801 end_sequence ();
47803 return ret;
47806 void
47807 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
47809 struct expand_vec_perm_d d;
47810 unsigned i, nelt;
47812 d.target = targ;
47813 d.op0 = op0;
47814 d.op1 = op1;
47815 d.vmode = GET_MODE (targ);
47816 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47817 d.one_operand_p = false;
47818 d.testing_p = false;
47820 for (i = 0; i < nelt; ++i)
47821 d.perm[i] = i * 2 + odd;
47823 /* We'll either be able to implement the permutation directly... */
47824 if (expand_vec_perm_1 (&d))
47825 return;
47827 /* ... or we use the special-case patterns. */
47828 expand_vec_perm_even_odd_1 (&d, odd);
47831 static void
47832 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
47834 struct expand_vec_perm_d d;
47835 unsigned i, nelt, base;
47836 bool ok;
47838 d.target = targ;
47839 d.op0 = op0;
47840 d.op1 = op1;
47841 d.vmode = GET_MODE (targ);
47842 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
47843 d.one_operand_p = false;
47844 d.testing_p = false;
47846 base = high_p ? nelt / 2 : 0;
47847 for (i = 0; i < nelt / 2; ++i)
47849 d.perm[i * 2] = i + base;
47850 d.perm[i * 2 + 1] = i + base + nelt;
47853 /* Note that for AVX this isn't one instruction. */
47854 ok = ix86_expand_vec_perm_const_1 (&d);
47855 gcc_assert (ok);
47859 /* Expand a vector operation CODE for a V*QImode in terms of the
47860 same operation on V*HImode. */
47862 void
47863 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
47865 enum machine_mode qimode = GET_MODE (dest);
47866 enum machine_mode himode;
47867 rtx (*gen_il) (rtx, rtx, rtx);
47868 rtx (*gen_ih) (rtx, rtx, rtx);
47869 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
47870 struct expand_vec_perm_d d;
47871 bool ok, full_interleave;
47872 bool uns_p = false;
47873 int i;
47875 switch (qimode)
47877 case V16QImode:
47878 himode = V8HImode;
47879 gen_il = gen_vec_interleave_lowv16qi;
47880 gen_ih = gen_vec_interleave_highv16qi;
47881 break;
47882 case V32QImode:
47883 himode = V16HImode;
47884 gen_il = gen_avx2_interleave_lowv32qi;
47885 gen_ih = gen_avx2_interleave_highv32qi;
47886 break;
47887 case V64QImode:
47888 himode = V32HImode;
47889 gen_il = gen_avx512bw_interleave_lowv64qi;
47890 gen_ih = gen_avx512bw_interleave_highv64qi;
47891 break;
47892 default:
47893 gcc_unreachable ();
47896 op2_l = op2_h = op2;
47897 switch (code)
47899 case MULT:
47900 /* Unpack data such that we've got a source byte in each low byte of
47901 each word. We don't care what goes into the high byte of each word.
47902 Rather than trying to get zero in there, most convenient is to let
47903 it be a copy of the low byte. */
47904 op2_l = gen_reg_rtx (qimode);
47905 op2_h = gen_reg_rtx (qimode);
47906 emit_insn (gen_il (op2_l, op2, op2));
47907 emit_insn (gen_ih (op2_h, op2, op2));
47908 /* FALLTHRU */
47910 op1_l = gen_reg_rtx (qimode);
47911 op1_h = gen_reg_rtx (qimode);
47912 emit_insn (gen_il (op1_l, op1, op1));
47913 emit_insn (gen_ih (op1_h, op1, op1));
47914 full_interleave = qimode == V16QImode;
47915 break;
47917 case ASHIFT:
47918 case LSHIFTRT:
47919 uns_p = true;
47920 /* FALLTHRU */
47921 case ASHIFTRT:
47922 op1_l = gen_reg_rtx (himode);
47923 op1_h = gen_reg_rtx (himode);
47924 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
47925 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
47926 full_interleave = true;
47927 break;
47928 default:
47929 gcc_unreachable ();
47932 /* Perform the operation. */
47933 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
47934 1, OPTAB_DIRECT);
47935 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
47936 1, OPTAB_DIRECT);
47937 gcc_assert (res_l && res_h);
47939 /* Merge the data back into the right place. */
47940 d.target = dest;
47941 d.op0 = gen_lowpart (qimode, res_l);
47942 d.op1 = gen_lowpart (qimode, res_h);
47943 d.vmode = qimode;
47944 d.nelt = GET_MODE_NUNITS (qimode);
47945 d.one_operand_p = false;
47946 d.testing_p = false;
47948 if (full_interleave)
47950 /* For SSE2, we used an full interleave, so the desired
47951 results are in the even elements. */
47952 for (i = 0; i < 64; ++i)
47953 d.perm[i] = i * 2;
47955 else
47957 /* For AVX, the interleave used above was not cross-lane. So the
47958 extraction is evens but with the second and third quarter swapped.
47959 Happily, that is even one insn shorter than even extraction. */
47960 for (i = 0; i < 64; ++i)
47961 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
47964 ok = ix86_expand_vec_perm_const_1 (&d);
47965 gcc_assert (ok);
47967 set_unique_reg_note (get_last_insn (), REG_EQUAL,
47968 gen_rtx_fmt_ee (code, qimode, op1, op2));
47971 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
47972 if op is CONST_VECTOR with all odd elements equal to their
47973 preceding element. */
47975 static bool
47976 const_vector_equal_evenodd_p (rtx op)
47978 enum machine_mode mode = GET_MODE (op);
47979 int i, nunits = GET_MODE_NUNITS (mode);
47980 if (GET_CODE (op) != CONST_VECTOR
47981 || nunits != CONST_VECTOR_NUNITS (op))
47982 return false;
47983 for (i = 0; i < nunits; i += 2)
47984 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
47985 return false;
47986 return true;
47989 void
47990 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
47991 bool uns_p, bool odd_p)
47993 enum machine_mode mode = GET_MODE (op1);
47994 enum machine_mode wmode = GET_MODE (dest);
47995 rtx x;
47996 rtx orig_op1 = op1, orig_op2 = op2;
47998 if (!nonimmediate_operand (op1, mode))
47999 op1 = force_reg (mode, op1);
48000 if (!nonimmediate_operand (op2, mode))
48001 op2 = force_reg (mode, op2);
48003 /* We only play even/odd games with vectors of SImode. */
48004 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
48006 /* If we're looking for the odd results, shift those members down to
48007 the even slots. For some cpus this is faster than a PSHUFD. */
48008 if (odd_p)
48010 /* For XOP use vpmacsdqh, but only for smult, as it is only
48011 signed. */
48012 if (TARGET_XOP && mode == V4SImode && !uns_p)
48014 x = force_reg (wmode, CONST0_RTX (wmode));
48015 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
48016 return;
48019 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
48020 if (!const_vector_equal_evenodd_p (orig_op1))
48021 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
48022 x, NULL, 1, OPTAB_DIRECT);
48023 if (!const_vector_equal_evenodd_p (orig_op2))
48024 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
48025 x, NULL, 1, OPTAB_DIRECT);
48026 op1 = gen_lowpart (mode, op1);
48027 op2 = gen_lowpart (mode, op2);
48030 if (mode == V16SImode)
48032 if (uns_p)
48033 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
48034 else
48035 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
48037 else if (mode == V8SImode)
48039 if (uns_p)
48040 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
48041 else
48042 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
48044 else if (uns_p)
48045 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
48046 else if (TARGET_SSE4_1)
48047 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
48048 else
48050 rtx s1, s2, t0, t1, t2;
48052 /* The easiest way to implement this without PMULDQ is to go through
48053 the motions as if we are performing a full 64-bit multiply. With
48054 the exception that we need to do less shuffling of the elements. */
48056 /* Compute the sign-extension, aka highparts, of the two operands. */
48057 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
48058 op1, pc_rtx, pc_rtx);
48059 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
48060 op2, pc_rtx, pc_rtx);
48062 /* Multiply LO(A) * HI(B), and vice-versa. */
48063 t1 = gen_reg_rtx (wmode);
48064 t2 = gen_reg_rtx (wmode);
48065 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
48066 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
48068 /* Multiply LO(A) * LO(B). */
48069 t0 = gen_reg_rtx (wmode);
48070 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
48072 /* Combine and shift the highparts into place. */
48073 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
48074 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
48075 1, OPTAB_DIRECT);
48077 /* Combine high and low parts. */
48078 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
48079 return;
48081 emit_insn (x);
48084 void
48085 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
48086 bool uns_p, bool high_p)
48088 enum machine_mode wmode = GET_MODE (dest);
48089 enum machine_mode mode = GET_MODE (op1);
48090 rtx t1, t2, t3, t4, mask;
48092 switch (mode)
48094 case V4SImode:
48095 t1 = gen_reg_rtx (mode);
48096 t2 = gen_reg_rtx (mode);
48097 if (TARGET_XOP && !uns_p)
48099 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
48100 shuffle the elements once so that all elements are in the right
48101 place for immediate use: { A C B D }. */
48102 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
48103 const1_rtx, GEN_INT (3)));
48104 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
48105 const1_rtx, GEN_INT (3)));
48107 else
48109 /* Put the elements into place for the multiply. */
48110 ix86_expand_vec_interleave (t1, op1, op1, high_p);
48111 ix86_expand_vec_interleave (t2, op2, op2, high_p);
48112 high_p = false;
48114 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
48115 break;
48117 case V8SImode:
48118 /* Shuffle the elements between the lanes. After this we
48119 have { A B E F | C D G H } for each operand. */
48120 t1 = gen_reg_rtx (V4DImode);
48121 t2 = gen_reg_rtx (V4DImode);
48122 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
48123 const0_rtx, const2_rtx,
48124 const1_rtx, GEN_INT (3)));
48125 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
48126 const0_rtx, const2_rtx,
48127 const1_rtx, GEN_INT (3)));
48129 /* Shuffle the elements within the lanes. After this we
48130 have { A A B B | C C D D } or { E E F F | G G H H }. */
48131 t3 = gen_reg_rtx (V8SImode);
48132 t4 = gen_reg_rtx (V8SImode);
48133 mask = GEN_INT (high_p
48134 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
48135 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
48136 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
48137 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
48139 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
48140 break;
48142 case V8HImode:
48143 case V16HImode:
48144 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
48145 uns_p, OPTAB_DIRECT);
48146 t2 = expand_binop (mode,
48147 uns_p ? umul_highpart_optab : smul_highpart_optab,
48148 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
48149 gcc_assert (t1 && t2);
48151 t3 = gen_reg_rtx (mode);
48152 ix86_expand_vec_interleave (t3, t1, t2, high_p);
48153 emit_move_insn (dest, gen_lowpart (wmode, t3));
48154 break;
48156 case V16QImode:
48157 case V32QImode:
48158 /* TODO why handle hi here and not in hi case*/
48159 case V32HImode:
48160 case V16SImode:
48161 case V64QImode:
48162 t1 = gen_reg_rtx (wmode);
48163 t2 = gen_reg_rtx (wmode);
48164 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
48165 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
48167 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
48168 break;
48170 default:
48171 gcc_unreachable ();
48175 void
48176 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
48178 rtx res_1, res_2, res_3, res_4;
48180 res_1 = gen_reg_rtx (V4SImode);
48181 res_2 = gen_reg_rtx (V4SImode);
48182 res_3 = gen_reg_rtx (V2DImode);
48183 res_4 = gen_reg_rtx (V2DImode);
48184 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
48185 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
48187 /* Move the results in element 2 down to element 1; we don't care
48188 what goes in elements 2 and 3. Then we can merge the parts
48189 back together with an interleave.
48191 Note that two other sequences were tried:
48192 (1) Use interleaves at the start instead of psrldq, which allows
48193 us to use a single shufps to merge things back at the end.
48194 (2) Use shufps here to combine the two vectors, then pshufd to
48195 put the elements in the correct order.
48196 In both cases the cost of the reformatting stall was too high
48197 and the overall sequence slower. */
48199 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
48200 const0_rtx, const2_rtx,
48201 const0_rtx, const0_rtx));
48202 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
48203 const0_rtx, const2_rtx,
48204 const0_rtx, const0_rtx));
48205 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
48207 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
48210 void
48211 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
48213 enum machine_mode mode = GET_MODE (op0);
48214 rtx t1, t2, t3, t4, t5, t6;
48216 if (TARGET_AVX512DQ && mode == V8DImode)
48217 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
48218 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
48219 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
48220 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
48221 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
48222 else if (TARGET_XOP && mode == V2DImode)
48224 /* op1: A,B,C,D, op2: E,F,G,H */
48225 op1 = gen_lowpart (V4SImode, op1);
48226 op2 = gen_lowpart (V4SImode, op2);
48228 t1 = gen_reg_rtx (V4SImode);
48229 t2 = gen_reg_rtx (V4SImode);
48230 t3 = gen_reg_rtx (V2DImode);
48231 t4 = gen_reg_rtx (V2DImode);
48233 /* t1: B,A,D,C */
48234 emit_insn (gen_sse2_pshufd_1 (t1, op1,
48235 GEN_INT (1),
48236 GEN_INT (0),
48237 GEN_INT (3),
48238 GEN_INT (2)));
48240 /* t2: (B*E),(A*F),(D*G),(C*H) */
48241 emit_insn (gen_mulv4si3 (t2, t1, op2));
48243 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
48244 emit_insn (gen_xop_phadddq (t3, t2));
48246 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
48247 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
48249 /* Multiply lower parts and add all */
48250 t5 = gen_reg_rtx (V2DImode);
48251 emit_insn (gen_vec_widen_umult_even_v4si (t5,
48252 gen_lowpart (V4SImode, op1),
48253 gen_lowpart (V4SImode, op2)));
48254 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
48257 else
48259 enum machine_mode nmode;
48260 rtx (*umul) (rtx, rtx, rtx);
48262 if (mode == V2DImode)
48264 umul = gen_vec_widen_umult_even_v4si;
48265 nmode = V4SImode;
48267 else if (mode == V4DImode)
48269 umul = gen_vec_widen_umult_even_v8si;
48270 nmode = V8SImode;
48272 else if (mode == V8DImode)
48274 umul = gen_vec_widen_umult_even_v16si;
48275 nmode = V16SImode;
48277 else
48278 gcc_unreachable ();
48281 /* Multiply low parts. */
48282 t1 = gen_reg_rtx (mode);
48283 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
48285 /* Shift input vectors right 32 bits so we can multiply high parts. */
48286 t6 = GEN_INT (32);
48287 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
48288 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
48290 /* Multiply high parts by low parts. */
48291 t4 = gen_reg_rtx (mode);
48292 t5 = gen_reg_rtx (mode);
48293 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
48294 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
48296 /* Combine and shift the highparts back. */
48297 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
48298 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
48300 /* Combine high and low parts. */
48301 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
48304 set_unique_reg_note (get_last_insn (), REG_EQUAL,
48305 gen_rtx_MULT (mode, op1, op2));
48308 /* Calculate integer abs() using only SSE2 instructions. */
48310 void
48311 ix86_expand_sse2_abs (rtx target, rtx input)
48313 enum machine_mode mode = GET_MODE (target);
48314 rtx tmp0, tmp1, x;
48316 switch (mode)
48318 /* For 32-bit signed integer X, the best way to calculate the absolute
48319 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
48320 case V4SImode:
48321 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
48322 GEN_INT (GET_MODE_BITSIZE
48323 (GET_MODE_INNER (mode)) - 1),
48324 NULL, 0, OPTAB_DIRECT);
48325 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
48326 NULL, 0, OPTAB_DIRECT);
48327 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
48328 target, 0, OPTAB_DIRECT);
48329 break;
48331 /* For 16-bit signed integer X, the best way to calculate the absolute
48332 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
48333 case V8HImode:
48334 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48336 x = expand_simple_binop (mode, SMAX, tmp0, input,
48337 target, 0, OPTAB_DIRECT);
48338 break;
48340 /* For 8-bit signed integer X, the best way to calculate the absolute
48341 value of X is min ((unsigned char) X, (unsigned char) (-X)),
48342 as SSE2 provides the PMINUB insn. */
48343 case V16QImode:
48344 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
48346 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
48347 target, 0, OPTAB_DIRECT);
48348 break;
48350 default:
48351 gcc_unreachable ();
48354 if (x != target)
48355 emit_move_insn (target, x);
48358 /* Expand an insert into a vector register through pinsr insn.
48359 Return true if successful. */
48361 bool
48362 ix86_expand_pinsr (rtx *operands)
48364 rtx dst = operands[0];
48365 rtx src = operands[3];
48367 unsigned int size = INTVAL (operands[1]);
48368 unsigned int pos = INTVAL (operands[2]);
48370 if (GET_CODE (dst) == SUBREG)
48372 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
48373 dst = SUBREG_REG (dst);
48376 if (GET_CODE (src) == SUBREG)
48377 src = SUBREG_REG (src);
48379 switch (GET_MODE (dst))
48381 case V16QImode:
48382 case V8HImode:
48383 case V4SImode:
48384 case V2DImode:
48386 enum machine_mode srcmode, dstmode;
48387 rtx (*pinsr)(rtx, rtx, rtx, rtx);
48389 srcmode = mode_for_size (size, MODE_INT, 0);
48391 switch (srcmode)
48393 case QImode:
48394 if (!TARGET_SSE4_1)
48395 return false;
48396 dstmode = V16QImode;
48397 pinsr = gen_sse4_1_pinsrb;
48398 break;
48400 case HImode:
48401 if (!TARGET_SSE2)
48402 return false;
48403 dstmode = V8HImode;
48404 pinsr = gen_sse2_pinsrw;
48405 break;
48407 case SImode:
48408 if (!TARGET_SSE4_1)
48409 return false;
48410 dstmode = V4SImode;
48411 pinsr = gen_sse4_1_pinsrd;
48412 break;
48414 case DImode:
48415 gcc_assert (TARGET_64BIT);
48416 if (!TARGET_SSE4_1)
48417 return false;
48418 dstmode = V2DImode;
48419 pinsr = gen_sse4_1_pinsrq;
48420 break;
48422 default:
48423 return false;
48426 rtx d = dst;
48427 if (GET_MODE (dst) != dstmode)
48428 d = gen_reg_rtx (dstmode);
48429 src = gen_lowpart (srcmode, src);
48431 pos /= size;
48433 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
48434 GEN_INT (1 << pos)));
48435 if (d != dst)
48436 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
48437 return true;
48440 default:
48441 return false;
48445 /* This function returns the calling abi specific va_list type node.
48446 It returns the FNDECL specific va_list type. */
48448 static tree
48449 ix86_fn_abi_va_list (tree fndecl)
48451 if (!TARGET_64BIT)
48452 return va_list_type_node;
48453 gcc_assert (fndecl != NULL_TREE);
48455 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
48456 return ms_va_list_type_node;
48457 else
48458 return sysv_va_list_type_node;
48461 /* Returns the canonical va_list type specified by TYPE. If there
48462 is no valid TYPE provided, it return NULL_TREE. */
48464 static tree
48465 ix86_canonical_va_list_type (tree type)
48467 tree wtype, htype;
48469 /* Resolve references and pointers to va_list type. */
48470 if (TREE_CODE (type) == MEM_REF)
48471 type = TREE_TYPE (type);
48472 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
48473 type = TREE_TYPE (type);
48474 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
48475 type = TREE_TYPE (type);
48477 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
48479 wtype = va_list_type_node;
48480 gcc_assert (wtype != NULL_TREE);
48481 htype = type;
48482 if (TREE_CODE (wtype) == ARRAY_TYPE)
48484 /* If va_list is an array type, the argument may have decayed
48485 to a pointer type, e.g. by being passed to another function.
48486 In that case, unwrap both types so that we can compare the
48487 underlying records. */
48488 if (TREE_CODE (htype) == ARRAY_TYPE
48489 || POINTER_TYPE_P (htype))
48491 wtype = TREE_TYPE (wtype);
48492 htype = TREE_TYPE (htype);
48495 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
48496 return va_list_type_node;
48497 wtype = sysv_va_list_type_node;
48498 gcc_assert (wtype != NULL_TREE);
48499 htype = type;
48500 if (TREE_CODE (wtype) == ARRAY_TYPE)
48502 /* If va_list is an array type, the argument may have decayed
48503 to a pointer type, e.g. by being passed to another function.
48504 In that case, unwrap both types so that we can compare the
48505 underlying records. */
48506 if (TREE_CODE (htype) == ARRAY_TYPE
48507 || POINTER_TYPE_P (htype))
48509 wtype = TREE_TYPE (wtype);
48510 htype = TREE_TYPE (htype);
48513 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
48514 return sysv_va_list_type_node;
48515 wtype = ms_va_list_type_node;
48516 gcc_assert (wtype != NULL_TREE);
48517 htype = type;
48518 if (TREE_CODE (wtype) == ARRAY_TYPE)
48520 /* If va_list is an array type, the argument may have decayed
48521 to a pointer type, e.g. by being passed to another function.
48522 In that case, unwrap both types so that we can compare the
48523 underlying records. */
48524 if (TREE_CODE (htype) == ARRAY_TYPE
48525 || POINTER_TYPE_P (htype))
48527 wtype = TREE_TYPE (wtype);
48528 htype = TREE_TYPE (htype);
48531 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
48532 return ms_va_list_type_node;
48533 return NULL_TREE;
48535 return std_canonical_va_list_type (type);
48538 /* Iterate through the target-specific builtin types for va_list.
48539 IDX denotes the iterator, *PTREE is set to the result type of
48540 the va_list builtin, and *PNAME to its internal type.
48541 Returns zero if there is no element for this index, otherwise
48542 IDX should be increased upon the next call.
48543 Note, do not iterate a base builtin's name like __builtin_va_list.
48544 Used from c_common_nodes_and_builtins. */
48546 static int
48547 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
48549 if (TARGET_64BIT)
48551 switch (idx)
48553 default:
48554 break;
48556 case 0:
48557 *ptree = ms_va_list_type_node;
48558 *pname = "__builtin_ms_va_list";
48559 return 1;
48561 case 1:
48562 *ptree = sysv_va_list_type_node;
48563 *pname = "__builtin_sysv_va_list";
48564 return 1;
48568 return 0;
48571 #undef TARGET_SCHED_DISPATCH
48572 #define TARGET_SCHED_DISPATCH has_dispatch
48573 #undef TARGET_SCHED_DISPATCH_DO
48574 #define TARGET_SCHED_DISPATCH_DO do_dispatch
48575 #undef TARGET_SCHED_REASSOCIATION_WIDTH
48576 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
48577 #undef TARGET_SCHED_REORDER
48578 #define TARGET_SCHED_REORDER ix86_sched_reorder
48579 #undef TARGET_SCHED_ADJUST_PRIORITY
48580 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
48581 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
48582 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
48583 ix86_dependencies_evaluation_hook
48585 /* The size of the dispatch window is the total number of bytes of
48586 object code allowed in a window. */
48587 #define DISPATCH_WINDOW_SIZE 16
48589 /* Number of dispatch windows considered for scheduling. */
48590 #define MAX_DISPATCH_WINDOWS 3
48592 /* Maximum number of instructions in a window. */
48593 #define MAX_INSN 4
48595 /* Maximum number of immediate operands in a window. */
48596 #define MAX_IMM 4
48598 /* Maximum number of immediate bits allowed in a window. */
48599 #define MAX_IMM_SIZE 128
48601 /* Maximum number of 32 bit immediates allowed in a window. */
48602 #define MAX_IMM_32 4
48604 /* Maximum number of 64 bit immediates allowed in a window. */
48605 #define MAX_IMM_64 2
48607 /* Maximum total of loads or prefetches allowed in a window. */
48608 #define MAX_LOAD 2
48610 /* Maximum total of stores allowed in a window. */
48611 #define MAX_STORE 1
48613 #undef BIG
48614 #define BIG 100
48617 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
48618 enum dispatch_group {
48619 disp_no_group = 0,
48620 disp_load,
48621 disp_store,
48622 disp_load_store,
48623 disp_prefetch,
48624 disp_imm,
48625 disp_imm_32,
48626 disp_imm_64,
48627 disp_branch,
48628 disp_cmp,
48629 disp_jcc,
48630 disp_last
48633 /* Number of allowable groups in a dispatch window. It is an array
48634 indexed by dispatch_group enum. 100 is used as a big number,
48635 because the number of these kind of operations does not have any
48636 effect in dispatch window, but we need them for other reasons in
48637 the table. */
48638 static unsigned int num_allowable_groups[disp_last] = {
48639 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
48642 char group_name[disp_last + 1][16] = {
48643 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
48644 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
48645 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
48648 /* Instruction path. */
48649 enum insn_path {
48650 no_path = 0,
48651 path_single, /* Single micro op. */
48652 path_double, /* Double micro op. */
48653 path_multi, /* Instructions with more than 2 micro op.. */
48654 last_path
48657 /* sched_insn_info defines a window to the instructions scheduled in
48658 the basic block. It contains a pointer to the insn_info table and
48659 the instruction scheduled.
48661 Windows are allocated for each basic block and are linked
48662 together. */
48663 typedef struct sched_insn_info_s {
48664 rtx insn;
48665 enum dispatch_group group;
48666 enum insn_path path;
48667 int byte_len;
48668 int imm_bytes;
48669 } sched_insn_info;
48671 /* Linked list of dispatch windows. This is a two way list of
48672 dispatch windows of a basic block. It contains information about
48673 the number of uops in the window and the total number of
48674 instructions and of bytes in the object code for this dispatch
48675 window. */
48676 typedef struct dispatch_windows_s {
48677 int num_insn; /* Number of insn in the window. */
48678 int num_uops; /* Number of uops in the window. */
48679 int window_size; /* Number of bytes in the window. */
48680 int window_num; /* Window number between 0 or 1. */
48681 int num_imm; /* Number of immediates in an insn. */
48682 int num_imm_32; /* Number of 32 bit immediates in an insn. */
48683 int num_imm_64; /* Number of 64 bit immediates in an insn. */
48684 int imm_size; /* Total immediates in the window. */
48685 int num_loads; /* Total memory loads in the window. */
48686 int num_stores; /* Total memory stores in the window. */
48687 int violation; /* Violation exists in window. */
48688 sched_insn_info *window; /* Pointer to the window. */
48689 struct dispatch_windows_s *next;
48690 struct dispatch_windows_s *prev;
48691 } dispatch_windows;
48693 /* Immediate valuse used in an insn. */
48694 typedef struct imm_info_s
48696 int imm;
48697 int imm32;
48698 int imm64;
48699 } imm_info;
48701 static dispatch_windows *dispatch_window_list;
48702 static dispatch_windows *dispatch_window_list1;
48704 /* Get dispatch group of insn. */
48706 static enum dispatch_group
48707 get_mem_group (rtx insn)
48709 enum attr_memory memory;
48711 if (INSN_CODE (insn) < 0)
48712 return disp_no_group;
48713 memory = get_attr_memory (insn);
48714 if (memory == MEMORY_STORE)
48715 return disp_store;
48717 if (memory == MEMORY_LOAD)
48718 return disp_load;
48720 if (memory == MEMORY_BOTH)
48721 return disp_load_store;
48723 return disp_no_group;
48726 /* Return true if insn is a compare instruction. */
48728 static bool
48729 is_cmp (rtx insn)
48731 enum attr_type type;
48733 type = get_attr_type (insn);
48734 return (type == TYPE_TEST
48735 || type == TYPE_ICMP
48736 || type == TYPE_FCMP
48737 || GET_CODE (PATTERN (insn)) == COMPARE);
48740 /* Return true if a dispatch violation encountered. */
48742 static bool
48743 dispatch_violation (void)
48745 if (dispatch_window_list->next)
48746 return dispatch_window_list->next->violation;
48747 return dispatch_window_list->violation;
48750 /* Return true if insn is a branch instruction. */
48752 static bool
48753 is_branch (rtx insn)
48755 return (CALL_P (insn) || JUMP_P (insn));
48758 /* Return true if insn is a prefetch instruction. */
48760 static bool
48761 is_prefetch (rtx insn)
48763 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
48766 /* This function initializes a dispatch window and the list container holding a
48767 pointer to the window. */
48769 static void
48770 init_window (int window_num)
48772 int i;
48773 dispatch_windows *new_list;
48775 if (window_num == 0)
48776 new_list = dispatch_window_list;
48777 else
48778 new_list = dispatch_window_list1;
48780 new_list->num_insn = 0;
48781 new_list->num_uops = 0;
48782 new_list->window_size = 0;
48783 new_list->next = NULL;
48784 new_list->prev = NULL;
48785 new_list->window_num = window_num;
48786 new_list->num_imm = 0;
48787 new_list->num_imm_32 = 0;
48788 new_list->num_imm_64 = 0;
48789 new_list->imm_size = 0;
48790 new_list->num_loads = 0;
48791 new_list->num_stores = 0;
48792 new_list->violation = false;
48794 for (i = 0; i < MAX_INSN; i++)
48796 new_list->window[i].insn = NULL;
48797 new_list->window[i].group = disp_no_group;
48798 new_list->window[i].path = no_path;
48799 new_list->window[i].byte_len = 0;
48800 new_list->window[i].imm_bytes = 0;
48802 return;
48805 /* This function allocates and initializes a dispatch window and the
48806 list container holding a pointer to the window. */
48808 static dispatch_windows *
48809 allocate_window (void)
48811 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
48812 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
48814 return new_list;
48817 /* This routine initializes the dispatch scheduling information. It
48818 initiates building dispatch scheduler tables and constructs the
48819 first dispatch window. */
48821 static void
48822 init_dispatch_sched (void)
48824 /* Allocate a dispatch list and a window. */
48825 dispatch_window_list = allocate_window ();
48826 dispatch_window_list1 = allocate_window ();
48827 init_window (0);
48828 init_window (1);
48831 /* This function returns true if a branch is detected. End of a basic block
48832 does not have to be a branch, but here we assume only branches end a
48833 window. */
48835 static bool
48836 is_end_basic_block (enum dispatch_group group)
48838 return group == disp_branch;
48841 /* This function is called when the end of a window processing is reached. */
48843 static void
48844 process_end_window (void)
48846 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
48847 if (dispatch_window_list->next)
48849 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
48850 gcc_assert (dispatch_window_list->window_size
48851 + dispatch_window_list1->window_size <= 48);
48852 init_window (1);
48854 init_window (0);
48857 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
48858 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
48859 for 48 bytes of instructions. Note that these windows are not dispatch
48860 windows that their sizes are DISPATCH_WINDOW_SIZE. */
48862 static dispatch_windows *
48863 allocate_next_window (int window_num)
48865 if (window_num == 0)
48867 if (dispatch_window_list->next)
48868 init_window (1);
48869 init_window (0);
48870 return dispatch_window_list;
48873 dispatch_window_list->next = dispatch_window_list1;
48874 dispatch_window_list1->prev = dispatch_window_list;
48876 return dispatch_window_list1;
48879 /* Increment the number of immediate operands of an instruction. */
48881 static int
48882 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
48884 if (*in_rtx == 0)
48885 return 0;
48887 switch ( GET_CODE (*in_rtx))
48889 case CONST:
48890 case SYMBOL_REF:
48891 case CONST_INT:
48892 (imm_values->imm)++;
48893 if (x86_64_immediate_operand (*in_rtx, SImode))
48894 (imm_values->imm32)++;
48895 else
48896 (imm_values->imm64)++;
48897 break;
48899 case CONST_DOUBLE:
48900 (imm_values->imm)++;
48901 (imm_values->imm64)++;
48902 break;
48904 case CODE_LABEL:
48905 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
48907 (imm_values->imm)++;
48908 (imm_values->imm32)++;
48910 break;
48912 default:
48913 break;
48916 return 0;
48919 /* Compute number of immediate operands of an instruction. */
48921 static void
48922 find_constant (rtx in_rtx, imm_info *imm_values)
48924 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
48925 (rtx_function) find_constant_1, (void *) imm_values);
48928 /* Return total size of immediate operands of an instruction along with number
48929 of corresponding immediate-operands. It initializes its parameters to zero
48930 befor calling FIND_CONSTANT.
48931 INSN is the input instruction. IMM is the total of immediates.
48932 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
48933 bit immediates. */
48935 static int
48936 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
48938 imm_info imm_values = {0, 0, 0};
48940 find_constant (insn, &imm_values);
48941 *imm = imm_values.imm;
48942 *imm32 = imm_values.imm32;
48943 *imm64 = imm_values.imm64;
48944 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
48947 /* This function indicates if an operand of an instruction is an
48948 immediate. */
48950 static bool
48951 has_immediate (rtx insn)
48953 int num_imm_operand;
48954 int num_imm32_operand;
48955 int num_imm64_operand;
48957 if (insn)
48958 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
48959 &num_imm64_operand);
48960 return false;
48963 /* Return single or double path for instructions. */
48965 static enum insn_path
48966 get_insn_path (rtx insn)
48968 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
48970 if ((int)path == 0)
48971 return path_single;
48973 if ((int)path == 1)
48974 return path_double;
48976 return path_multi;
48979 /* Return insn dispatch group. */
48981 static enum dispatch_group
48982 get_insn_group (rtx insn)
48984 enum dispatch_group group = get_mem_group (insn);
48985 if (group)
48986 return group;
48988 if (is_branch (insn))
48989 return disp_branch;
48991 if (is_cmp (insn))
48992 return disp_cmp;
48994 if (has_immediate (insn))
48995 return disp_imm;
48997 if (is_prefetch (insn))
48998 return disp_prefetch;
49000 return disp_no_group;
49003 /* Count number of GROUP restricted instructions in a dispatch
49004 window WINDOW_LIST. */
49006 static int
49007 count_num_restricted (rtx insn, dispatch_windows *window_list)
49009 enum dispatch_group group = get_insn_group (insn);
49010 int imm_size;
49011 int num_imm_operand;
49012 int num_imm32_operand;
49013 int num_imm64_operand;
49015 if (group == disp_no_group)
49016 return 0;
49018 if (group == disp_imm)
49020 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
49021 &num_imm64_operand);
49022 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
49023 || num_imm_operand + window_list->num_imm > MAX_IMM
49024 || (num_imm32_operand > 0
49025 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
49026 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
49027 || (num_imm64_operand > 0
49028 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
49029 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
49030 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
49031 && num_imm64_operand > 0
49032 && ((window_list->num_imm_64 > 0
49033 && window_list->num_insn >= 2)
49034 || window_list->num_insn >= 3)))
49035 return BIG;
49037 return 1;
49040 if ((group == disp_load_store
49041 && (window_list->num_loads >= MAX_LOAD
49042 || window_list->num_stores >= MAX_STORE))
49043 || ((group == disp_load
49044 || group == disp_prefetch)
49045 && window_list->num_loads >= MAX_LOAD)
49046 || (group == disp_store
49047 && window_list->num_stores >= MAX_STORE))
49048 return BIG;
49050 return 1;
49053 /* This function returns true if insn satisfies dispatch rules on the
49054 last window scheduled. */
49056 static bool
49057 fits_dispatch_window (rtx insn)
49059 dispatch_windows *window_list = dispatch_window_list;
49060 dispatch_windows *window_list_next = dispatch_window_list->next;
49061 unsigned int num_restrict;
49062 enum dispatch_group group = get_insn_group (insn);
49063 enum insn_path path = get_insn_path (insn);
49064 int sum;
49066 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
49067 instructions should be given the lowest priority in the
49068 scheduling process in Haifa scheduler to make sure they will be
49069 scheduled in the same dispatch window as the reference to them. */
49070 if (group == disp_jcc || group == disp_cmp)
49071 return false;
49073 /* Check nonrestricted. */
49074 if (group == disp_no_group || group == disp_branch)
49075 return true;
49077 /* Get last dispatch window. */
49078 if (window_list_next)
49079 window_list = window_list_next;
49081 if (window_list->window_num == 1)
49083 sum = window_list->prev->window_size + window_list->window_size;
49085 if (sum == 32
49086 || (min_insn_size (insn) + sum) >= 48)
49087 /* Window 1 is full. Go for next window. */
49088 return true;
49091 num_restrict = count_num_restricted (insn, window_list);
49093 if (num_restrict > num_allowable_groups[group])
49094 return false;
49096 /* See if it fits in the first window. */
49097 if (window_list->window_num == 0)
49099 /* The first widow should have only single and double path
49100 uops. */
49101 if (path == path_double
49102 && (window_list->num_uops + 2) > MAX_INSN)
49103 return false;
49104 else if (path != path_single)
49105 return false;
49107 return true;
49110 /* Add an instruction INSN with NUM_UOPS micro-operations to the
49111 dispatch window WINDOW_LIST. */
49113 static void
49114 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
49116 int byte_len = min_insn_size (insn);
49117 int num_insn = window_list->num_insn;
49118 int imm_size;
49119 sched_insn_info *window = window_list->window;
49120 enum dispatch_group group = get_insn_group (insn);
49121 enum insn_path path = get_insn_path (insn);
49122 int num_imm_operand;
49123 int num_imm32_operand;
49124 int num_imm64_operand;
49126 if (!window_list->violation && group != disp_cmp
49127 && !fits_dispatch_window (insn))
49128 window_list->violation = true;
49130 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
49131 &num_imm64_operand);
49133 /* Initialize window with new instruction. */
49134 window[num_insn].insn = insn;
49135 window[num_insn].byte_len = byte_len;
49136 window[num_insn].group = group;
49137 window[num_insn].path = path;
49138 window[num_insn].imm_bytes = imm_size;
49140 window_list->window_size += byte_len;
49141 window_list->num_insn = num_insn + 1;
49142 window_list->num_uops = window_list->num_uops + num_uops;
49143 window_list->imm_size += imm_size;
49144 window_list->num_imm += num_imm_operand;
49145 window_list->num_imm_32 += num_imm32_operand;
49146 window_list->num_imm_64 += num_imm64_operand;
49148 if (group == disp_store)
49149 window_list->num_stores += 1;
49150 else if (group == disp_load
49151 || group == disp_prefetch)
49152 window_list->num_loads += 1;
49153 else if (group == disp_load_store)
49155 window_list->num_stores += 1;
49156 window_list->num_loads += 1;
49160 /* Adds a scheduled instruction, INSN, to the current dispatch window.
49161 If the total bytes of instructions or the number of instructions in
49162 the window exceed allowable, it allocates a new window. */
49164 static void
49165 add_to_dispatch_window (rtx insn)
49167 int byte_len;
49168 dispatch_windows *window_list;
49169 dispatch_windows *next_list;
49170 dispatch_windows *window0_list;
49171 enum insn_path path;
49172 enum dispatch_group insn_group;
49173 bool insn_fits;
49174 int num_insn;
49175 int num_uops;
49176 int window_num;
49177 int insn_num_uops;
49178 int sum;
49180 if (INSN_CODE (insn) < 0)
49181 return;
49183 byte_len = min_insn_size (insn);
49184 window_list = dispatch_window_list;
49185 next_list = window_list->next;
49186 path = get_insn_path (insn);
49187 insn_group = get_insn_group (insn);
49189 /* Get the last dispatch window. */
49190 if (next_list)
49191 window_list = dispatch_window_list->next;
49193 if (path == path_single)
49194 insn_num_uops = 1;
49195 else if (path == path_double)
49196 insn_num_uops = 2;
49197 else
49198 insn_num_uops = (int) path;
49200 /* If current window is full, get a new window.
49201 Window number zero is full, if MAX_INSN uops are scheduled in it.
49202 Window number one is full, if window zero's bytes plus window
49203 one's bytes is 32, or if the bytes of the new instruction added
49204 to the total makes it greater than 48, or it has already MAX_INSN
49205 instructions in it. */
49206 num_insn = window_list->num_insn;
49207 num_uops = window_list->num_uops;
49208 window_num = window_list->window_num;
49209 insn_fits = fits_dispatch_window (insn);
49211 if (num_insn >= MAX_INSN
49212 || num_uops + insn_num_uops > MAX_INSN
49213 || !(insn_fits))
49215 window_num = ~window_num & 1;
49216 window_list = allocate_next_window (window_num);
49219 if (window_num == 0)
49221 add_insn_window (insn, window_list, insn_num_uops);
49222 if (window_list->num_insn >= MAX_INSN
49223 && insn_group == disp_branch)
49225 process_end_window ();
49226 return;
49229 else if (window_num == 1)
49231 window0_list = window_list->prev;
49232 sum = window0_list->window_size + window_list->window_size;
49233 if (sum == 32
49234 || (byte_len + sum) >= 48)
49236 process_end_window ();
49237 window_list = dispatch_window_list;
49240 add_insn_window (insn, window_list, insn_num_uops);
49242 else
49243 gcc_unreachable ();
49245 if (is_end_basic_block (insn_group))
49247 /* End of basic block is reached do end-basic-block process. */
49248 process_end_window ();
49249 return;
49253 /* Print the dispatch window, WINDOW_NUM, to FILE. */
49255 DEBUG_FUNCTION static void
49256 debug_dispatch_window_file (FILE *file, int window_num)
49258 dispatch_windows *list;
49259 int i;
49261 if (window_num == 0)
49262 list = dispatch_window_list;
49263 else
49264 list = dispatch_window_list1;
49266 fprintf (file, "Window #%d:\n", list->window_num);
49267 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
49268 list->num_insn, list->num_uops, list->window_size);
49269 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
49270 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
49272 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
49273 list->num_stores);
49274 fprintf (file, " insn info:\n");
49276 for (i = 0; i < MAX_INSN; i++)
49278 if (!list->window[i].insn)
49279 break;
49280 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
49281 i, group_name[list->window[i].group],
49282 i, (void *)list->window[i].insn,
49283 i, list->window[i].path,
49284 i, list->window[i].byte_len,
49285 i, list->window[i].imm_bytes);
49289 /* Print to stdout a dispatch window. */
49291 DEBUG_FUNCTION void
49292 debug_dispatch_window (int window_num)
49294 debug_dispatch_window_file (stdout, window_num);
49297 /* Print INSN dispatch information to FILE. */
49299 DEBUG_FUNCTION static void
49300 debug_insn_dispatch_info_file (FILE *file, rtx insn)
49302 int byte_len;
49303 enum insn_path path;
49304 enum dispatch_group group;
49305 int imm_size;
49306 int num_imm_operand;
49307 int num_imm32_operand;
49308 int num_imm64_operand;
49310 if (INSN_CODE (insn) < 0)
49311 return;
49313 byte_len = min_insn_size (insn);
49314 path = get_insn_path (insn);
49315 group = get_insn_group (insn);
49316 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
49317 &num_imm64_operand);
49319 fprintf (file, " insn info:\n");
49320 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
49321 group_name[group], path, byte_len);
49322 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
49323 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
49326 /* Print to STDERR the status of the ready list with respect to
49327 dispatch windows. */
49329 DEBUG_FUNCTION void
49330 debug_ready_dispatch (void)
49332 int i;
49333 int no_ready = number_in_ready ();
49335 fprintf (stdout, "Number of ready: %d\n", no_ready);
49337 for (i = 0; i < no_ready; i++)
49338 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
49341 /* This routine is the driver of the dispatch scheduler. */
49343 static void
49344 do_dispatch (rtx insn, int mode)
49346 if (mode == DISPATCH_INIT)
49347 init_dispatch_sched ();
49348 else if (mode == ADD_TO_DISPATCH_WINDOW)
49349 add_to_dispatch_window (insn);
49352 /* Return TRUE if Dispatch Scheduling is supported. */
49354 static bool
49355 has_dispatch (rtx insn, int action)
49357 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
49358 && flag_dispatch_scheduler)
49359 switch (action)
49361 default:
49362 return false;
49364 case IS_DISPATCH_ON:
49365 return true;
49366 break;
49368 case IS_CMP:
49369 return is_cmp (insn);
49371 case DISPATCH_VIOLATION:
49372 return dispatch_violation ();
49374 case FITS_DISPATCH_WINDOW:
49375 return fits_dispatch_window (insn);
49378 return false;
49381 /* Implementation of reassociation_width target hook used by
49382 reassoc phase to identify parallelism level in reassociated
49383 tree. Statements tree_code is passed in OPC. Arguments type
49384 is passed in MODE.
49386 Currently parallel reassociation is enabled for Atom
49387 processors only and we set reassociation width to be 2
49388 because Atom may issue up to 2 instructions per cycle.
49390 Return value should be fixed if parallel reassociation is
49391 enabled for other processors. */
49393 static int
49394 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
49395 enum machine_mode mode)
49397 int res = 1;
49399 /* Vector part. */
49400 if (VECTOR_MODE_P (mode))
49402 if (TARGET_VECTOR_PARALLEL_EXECUTION)
49403 return 2;
49404 else
49405 return 1;
49408 /* Scalar part. */
49409 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
49410 res = 2;
49411 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
49412 res = 2;
49414 return res;
49417 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
49418 place emms and femms instructions. */
49420 static enum machine_mode
49421 ix86_preferred_simd_mode (enum machine_mode mode)
49423 if (!TARGET_SSE)
49424 return word_mode;
49426 switch (mode)
49428 case QImode:
49429 return TARGET_AVX512BW ? V64QImode :
49430 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
49431 case HImode:
49432 return TARGET_AVX512BW ? V32HImode :
49433 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
49434 case SImode:
49435 return TARGET_AVX512F ? V16SImode :
49436 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
49437 case DImode:
49438 return TARGET_AVX512F ? V8DImode :
49439 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
49441 case SFmode:
49442 if (TARGET_AVX512F)
49443 return V16SFmode;
49444 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
49445 return V8SFmode;
49446 else
49447 return V4SFmode;
49449 case DFmode:
49450 if (!TARGET_VECTORIZE_DOUBLE)
49451 return word_mode;
49452 else if (TARGET_AVX512F)
49453 return V8DFmode;
49454 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
49455 return V4DFmode;
49456 else if (TARGET_SSE2)
49457 return V2DFmode;
49458 /* FALLTHRU */
49460 default:
49461 return word_mode;
49465 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
49466 vectors. If AVX512F is enabled then try vectorizing with 512bit,
49467 256bit and 128bit vectors. */
49469 static unsigned int
49470 ix86_autovectorize_vector_sizes (void)
49472 return TARGET_AVX512F ? 64 | 32 | 16 :
49473 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
49478 /* Return class of registers which could be used for pseudo of MODE
49479 and of class RCLASS for spilling instead of memory. Return NO_REGS
49480 if it is not possible or non-profitable. */
49481 static reg_class_t
49482 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
49484 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
49485 && (mode == SImode || (TARGET_64BIT && mode == DImode))
49486 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
49487 return ALL_SSE_REGS;
49488 return NO_REGS;
49491 /* Implement targetm.vectorize.init_cost. */
49493 static void *
49494 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
49496 unsigned *cost = XNEWVEC (unsigned, 3);
49497 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
49498 return cost;
49501 /* Implement targetm.vectorize.add_stmt_cost. */
49503 static unsigned
49504 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
49505 struct _stmt_vec_info *stmt_info, int misalign,
49506 enum vect_cost_model_location where)
49508 unsigned *cost = (unsigned *) data;
49509 unsigned retval = 0;
49511 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
49512 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
49514 /* Statements in an inner loop relative to the loop being
49515 vectorized are weighted more heavily. The value here is
49516 arbitrary and could potentially be improved with analysis. */
49517 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
49518 count *= 50; /* FIXME. */
49520 retval = (unsigned) (count * stmt_cost);
49522 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
49523 for Silvermont as it has out of order integer pipeline and can execute
49524 2 scalar instruction per tick, but has in order SIMD pipeline. */
49525 if (TARGET_SILVERMONT || TARGET_INTEL)
49526 if (stmt_info && stmt_info->stmt)
49528 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
49529 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
49530 retval = (retval * 17) / 10;
49533 cost[where] += retval;
49535 return retval;
49538 /* Implement targetm.vectorize.finish_cost. */
49540 static void
49541 ix86_finish_cost (void *data, unsigned *prologue_cost,
49542 unsigned *body_cost, unsigned *epilogue_cost)
49544 unsigned *cost = (unsigned *) data;
49545 *prologue_cost = cost[vect_prologue];
49546 *body_cost = cost[vect_body];
49547 *epilogue_cost = cost[vect_epilogue];
49550 /* Implement targetm.vectorize.destroy_cost_data. */
49552 static void
49553 ix86_destroy_cost_data (void *data)
49555 free (data);
49558 /* Validate target specific memory model bits in VAL. */
49560 static unsigned HOST_WIDE_INT
49561 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
49563 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
49564 bool strong;
49566 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
49567 |MEMMODEL_MASK)
49568 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
49570 warning (OPT_Winvalid_memory_model,
49571 "Unknown architecture specific memory model");
49572 return MEMMODEL_SEQ_CST;
49574 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
49575 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
49577 warning (OPT_Winvalid_memory_model,
49578 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
49579 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
49581 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
49583 warning (OPT_Winvalid_memory_model,
49584 "HLE_RELEASE not used with RELEASE or stronger memory model");
49585 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
49587 return val;
49590 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
49591 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
49592 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
49593 or number of vecsize_mangle variants that should be emitted. */
49595 static int
49596 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
49597 struct cgraph_simd_clone *clonei,
49598 tree base_type, int num)
49600 int ret = 1;
49602 if (clonei->simdlen
49603 && (clonei->simdlen < 2
49604 || clonei->simdlen > 16
49605 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
49607 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49608 "unsupported simdlen %d", clonei->simdlen);
49609 return 0;
49612 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
49613 if (TREE_CODE (ret_type) != VOID_TYPE)
49614 switch (TYPE_MODE (ret_type))
49616 case QImode:
49617 case HImode:
49618 case SImode:
49619 case DImode:
49620 case SFmode:
49621 case DFmode:
49622 /* case SCmode: */
49623 /* case DCmode: */
49624 break;
49625 default:
49626 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49627 "unsupported return type %qT for simd\n", ret_type);
49628 return 0;
49631 tree t;
49632 int i;
49634 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
49635 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
49636 switch (TYPE_MODE (TREE_TYPE (t)))
49638 case QImode:
49639 case HImode:
49640 case SImode:
49641 case DImode:
49642 case SFmode:
49643 case DFmode:
49644 /* case SCmode: */
49645 /* case DCmode: */
49646 break;
49647 default:
49648 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
49649 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
49650 return 0;
49653 if (clonei->cilk_elemental)
49655 /* Parse here processor clause. If not present, default to 'b'. */
49656 clonei->vecsize_mangle = 'b';
49658 else if (!TREE_PUBLIC (node->decl))
49660 /* If the function isn't exported, we can pick up just one ISA
49661 for the clones. */
49662 if (TARGET_AVX2)
49663 clonei->vecsize_mangle = 'd';
49664 else if (TARGET_AVX)
49665 clonei->vecsize_mangle = 'c';
49666 else
49667 clonei->vecsize_mangle = 'b';
49668 ret = 1;
49670 else
49672 clonei->vecsize_mangle = "bcd"[num];
49673 ret = 3;
49675 switch (clonei->vecsize_mangle)
49677 case 'b':
49678 clonei->vecsize_int = 128;
49679 clonei->vecsize_float = 128;
49680 break;
49681 case 'c':
49682 clonei->vecsize_int = 128;
49683 clonei->vecsize_float = 256;
49684 break;
49685 case 'd':
49686 clonei->vecsize_int = 256;
49687 clonei->vecsize_float = 256;
49688 break;
49690 if (clonei->simdlen == 0)
49692 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
49693 clonei->simdlen = clonei->vecsize_int;
49694 else
49695 clonei->simdlen = clonei->vecsize_float;
49696 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
49697 if (clonei->simdlen > 16)
49698 clonei->simdlen = 16;
49700 return ret;
49703 /* Add target attribute to SIMD clone NODE if needed. */
49705 static void
49706 ix86_simd_clone_adjust (struct cgraph_node *node)
49708 const char *str = NULL;
49709 gcc_assert (node->decl == cfun->decl);
49710 switch (node->simdclone->vecsize_mangle)
49712 case 'b':
49713 if (!TARGET_SSE2)
49714 str = "sse2";
49715 break;
49716 case 'c':
49717 if (!TARGET_AVX)
49718 str = "avx";
49719 break;
49720 case 'd':
49721 if (!TARGET_AVX2)
49722 str = "avx2";
49723 break;
49724 default:
49725 gcc_unreachable ();
49727 if (str == NULL)
49728 return;
49729 push_cfun (NULL);
49730 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
49731 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
49732 gcc_assert (ok);
49733 pop_cfun ();
49734 ix86_previous_fndecl = NULL_TREE;
49735 ix86_set_current_function (node->decl);
49738 /* If SIMD clone NODE can't be used in a vectorized loop
49739 in current function, return -1, otherwise return a badness of using it
49740 (0 if it is most desirable from vecsize_mangle point of view, 1
49741 slightly less desirable, etc.). */
49743 static int
49744 ix86_simd_clone_usable (struct cgraph_node *node)
49746 switch (node->simdclone->vecsize_mangle)
49748 case 'b':
49749 if (!TARGET_SSE2)
49750 return -1;
49751 if (!TARGET_AVX)
49752 return 0;
49753 return TARGET_AVX2 ? 2 : 1;
49754 case 'c':
49755 if (!TARGET_AVX)
49756 return -1;
49757 return TARGET_AVX2 ? 1 : 0;
49758 break;
49759 case 'd':
49760 if (!TARGET_AVX2)
49761 return -1;
49762 return 0;
49763 default:
49764 gcc_unreachable ();
49768 /* This function gives out the number of memory references.
49769 This value determines the unrolling factor for
49770 bdver3 and bdver4 architectures. */
49772 static int
49773 ix86_loop_memcount (rtx *x, unsigned *mem_count)
49775 if (*x != NULL_RTX && MEM_P (*x))
49777 enum machine_mode mode;
49778 unsigned int n_words;
49780 mode = GET_MODE (*x);
49781 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
49783 if (n_words > 4)
49784 (*mem_count)+=2;
49785 else
49786 (*mem_count)+=1;
49788 return 0;
49791 /* This function adjusts the unroll factor based on
49792 the hardware capabilities. For ex, bdver3 has
49793 a loop buffer which makes unrolling of smaller
49794 loops less important. This function decides the
49795 unroll factor using number of memory references
49796 (value 32 is used) as a heuristic. */
49798 static unsigned
49799 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
49801 basic_block *bbs;
49802 rtx insn;
49803 unsigned i;
49804 unsigned mem_count = 0;
49806 if (!TARGET_ADJUST_UNROLL)
49807 return nunroll;
49809 /* Count the number of memory references within the loop body. */
49810 bbs = get_loop_body (loop);
49811 for (i = 0; i < loop->num_nodes; i++)
49813 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
49814 if (NONDEBUG_INSN_P (insn))
49815 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
49817 free (bbs);
49819 if (mem_count && mem_count <=32)
49820 return 32/mem_count;
49822 return nunroll;
49826 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
49828 static bool
49829 ix86_float_exceptions_rounding_supported_p (void)
49831 /* For x87 floating point with standard excess precision handling,
49832 there is no adddf3 pattern (since x87 floating point only has
49833 XFmode operations) so the default hook implementation gets this
49834 wrong. */
49835 return TARGET_80387 || TARGET_SSE_MATH;
49838 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
49840 static void
49841 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
49843 if (!TARGET_80387 && !TARGET_SSE_MATH)
49844 return;
49845 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
49846 if (TARGET_80387)
49848 tree fenv_index_type = build_index_type (size_int (6));
49849 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
49850 tree fenv_var = create_tmp_var (fenv_type, NULL);
49851 mark_addressable (fenv_var);
49852 tree fenv_ptr = build_pointer_type (fenv_type);
49853 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
49854 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
49855 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
49856 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
49857 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
49858 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
49859 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
49860 tree hold_fnclex = build_call_expr (fnclex, 0);
49861 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
49862 hold_fnclex);
49863 *clear = build_call_expr (fnclex, 0);
49864 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
49865 tree fnstsw_call = build_call_expr (fnstsw, 0);
49866 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
49867 sw_var, fnstsw_call);
49868 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
49869 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
49870 exceptions_var, exceptions_x87);
49871 *update = build2 (COMPOUND_EXPR, integer_type_node,
49872 sw_mod, update_mod);
49873 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
49874 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
49876 if (TARGET_SSE_MATH)
49878 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
49879 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
49880 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
49881 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
49882 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
49883 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
49884 mxcsr_orig_var, stmxcsr_hold_call);
49885 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
49886 mxcsr_orig_var,
49887 build_int_cst (unsigned_type_node, 0x1f80));
49888 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
49889 build_int_cst (unsigned_type_node, 0xffffffc0));
49890 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
49891 mxcsr_mod_var, hold_mod_val);
49892 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49893 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
49894 hold_assign_orig, hold_assign_mod);
49895 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
49896 ldmxcsr_hold_call);
49897 if (*hold)
49898 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
49899 else
49900 *hold = hold_all;
49901 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
49902 if (*clear)
49903 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
49904 ldmxcsr_clear_call);
49905 else
49906 *clear = ldmxcsr_clear_call;
49907 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
49908 tree exceptions_sse = fold_convert (integer_type_node,
49909 stxmcsr_update_call);
49910 if (*update)
49912 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
49913 exceptions_var, exceptions_sse);
49914 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
49915 exceptions_var, exceptions_mod);
49916 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
49917 exceptions_assign);
49919 else
49920 *update = build2 (MODIFY_EXPR, integer_type_node,
49921 exceptions_var, exceptions_sse);
49922 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
49923 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49924 ldmxcsr_update_call);
49926 tree atomic_feraiseexcept
49927 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
49928 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
49929 1, exceptions_var);
49930 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
49931 atomic_feraiseexcept_call);
49934 /* Initialize the GCC target structure. */
49935 #undef TARGET_RETURN_IN_MEMORY
49936 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
49938 #undef TARGET_LEGITIMIZE_ADDRESS
49939 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
49941 #undef TARGET_ATTRIBUTE_TABLE
49942 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
49943 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
49944 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
49945 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
49946 # undef TARGET_MERGE_DECL_ATTRIBUTES
49947 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
49948 #endif
49950 #undef TARGET_COMP_TYPE_ATTRIBUTES
49951 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
49953 #undef TARGET_INIT_BUILTINS
49954 #define TARGET_INIT_BUILTINS ix86_init_builtins
49955 #undef TARGET_BUILTIN_DECL
49956 #define TARGET_BUILTIN_DECL ix86_builtin_decl
49957 #undef TARGET_EXPAND_BUILTIN
49958 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
49960 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
49961 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
49962 ix86_builtin_vectorized_function
49964 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
49965 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
49967 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
49968 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
49970 #undef TARGET_VECTORIZE_BUILTIN_GATHER
49971 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
49973 #undef TARGET_BUILTIN_RECIPROCAL
49974 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
49976 #undef TARGET_ASM_FUNCTION_EPILOGUE
49977 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
49979 #undef TARGET_ENCODE_SECTION_INFO
49980 #ifndef SUBTARGET_ENCODE_SECTION_INFO
49981 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
49982 #else
49983 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
49984 #endif
49986 #undef TARGET_ASM_OPEN_PAREN
49987 #define TARGET_ASM_OPEN_PAREN ""
49988 #undef TARGET_ASM_CLOSE_PAREN
49989 #define TARGET_ASM_CLOSE_PAREN ""
49991 #undef TARGET_ASM_BYTE_OP
49992 #define TARGET_ASM_BYTE_OP ASM_BYTE
49994 #undef TARGET_ASM_ALIGNED_HI_OP
49995 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
49996 #undef TARGET_ASM_ALIGNED_SI_OP
49997 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
49998 #ifdef ASM_QUAD
49999 #undef TARGET_ASM_ALIGNED_DI_OP
50000 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
50001 #endif
50003 #undef TARGET_PROFILE_BEFORE_PROLOGUE
50004 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
50006 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
50007 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
50009 #undef TARGET_ASM_UNALIGNED_HI_OP
50010 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
50011 #undef TARGET_ASM_UNALIGNED_SI_OP
50012 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
50013 #undef TARGET_ASM_UNALIGNED_DI_OP
50014 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
50016 #undef TARGET_PRINT_OPERAND
50017 #define TARGET_PRINT_OPERAND ix86_print_operand
50018 #undef TARGET_PRINT_OPERAND_ADDRESS
50019 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
50020 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
50021 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
50022 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
50023 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
50025 #undef TARGET_SCHED_INIT_GLOBAL
50026 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
50027 #undef TARGET_SCHED_ADJUST_COST
50028 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
50029 #undef TARGET_SCHED_ISSUE_RATE
50030 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
50031 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
50032 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
50033 ia32_multipass_dfa_lookahead
50034 #undef TARGET_SCHED_MACRO_FUSION_P
50035 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
50036 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
50037 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
50039 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
50040 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
50042 #undef TARGET_MEMMODEL_CHECK
50043 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
50045 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
50046 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
50048 #ifdef HAVE_AS_TLS
50049 #undef TARGET_HAVE_TLS
50050 #define TARGET_HAVE_TLS true
50051 #endif
50052 #undef TARGET_CANNOT_FORCE_CONST_MEM
50053 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
50054 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
50055 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
50057 #undef TARGET_DELEGITIMIZE_ADDRESS
50058 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
50060 #undef TARGET_MS_BITFIELD_LAYOUT_P
50061 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
50063 #if TARGET_MACHO
50064 #undef TARGET_BINDS_LOCAL_P
50065 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
50066 #endif
50067 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
50068 #undef TARGET_BINDS_LOCAL_P
50069 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
50070 #endif
50072 #undef TARGET_ASM_OUTPUT_MI_THUNK
50073 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
50074 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
50075 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
50077 #undef TARGET_ASM_FILE_START
50078 #define TARGET_ASM_FILE_START x86_file_start
50080 #undef TARGET_OPTION_OVERRIDE
50081 #define TARGET_OPTION_OVERRIDE ix86_option_override
50083 #undef TARGET_REGISTER_MOVE_COST
50084 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
50085 #undef TARGET_MEMORY_MOVE_COST
50086 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
50087 #undef TARGET_RTX_COSTS
50088 #define TARGET_RTX_COSTS ix86_rtx_costs
50089 #undef TARGET_ADDRESS_COST
50090 #define TARGET_ADDRESS_COST ix86_address_cost
50092 #undef TARGET_FIXED_CONDITION_CODE_REGS
50093 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
50094 #undef TARGET_CC_MODES_COMPATIBLE
50095 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
50097 #undef TARGET_MACHINE_DEPENDENT_REORG
50098 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
50100 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
50101 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
50103 #undef TARGET_BUILD_BUILTIN_VA_LIST
50104 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
50106 #undef TARGET_FOLD_BUILTIN
50107 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
50109 #undef TARGET_COMPARE_VERSION_PRIORITY
50110 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
50112 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
50113 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
50114 ix86_generate_version_dispatcher_body
50116 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
50117 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
50118 ix86_get_function_versions_dispatcher
50120 #undef TARGET_ENUM_VA_LIST_P
50121 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
50123 #undef TARGET_FN_ABI_VA_LIST
50124 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
50126 #undef TARGET_CANONICAL_VA_LIST_TYPE
50127 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
50129 #undef TARGET_EXPAND_BUILTIN_VA_START
50130 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
50132 #undef TARGET_MD_ASM_CLOBBERS
50133 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
50135 #undef TARGET_PROMOTE_PROTOTYPES
50136 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
50137 #undef TARGET_SETUP_INCOMING_VARARGS
50138 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
50139 #undef TARGET_MUST_PASS_IN_STACK
50140 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
50141 #undef TARGET_FUNCTION_ARG_ADVANCE
50142 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
50143 #undef TARGET_FUNCTION_ARG
50144 #define TARGET_FUNCTION_ARG ix86_function_arg
50145 #undef TARGET_FUNCTION_ARG_BOUNDARY
50146 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
50147 #undef TARGET_PASS_BY_REFERENCE
50148 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
50149 #undef TARGET_INTERNAL_ARG_POINTER
50150 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
50151 #undef TARGET_UPDATE_STACK_BOUNDARY
50152 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
50153 #undef TARGET_GET_DRAP_RTX
50154 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
50155 #undef TARGET_STRICT_ARGUMENT_NAMING
50156 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
50157 #undef TARGET_STATIC_CHAIN
50158 #define TARGET_STATIC_CHAIN ix86_static_chain
50159 #undef TARGET_TRAMPOLINE_INIT
50160 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
50161 #undef TARGET_RETURN_POPS_ARGS
50162 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
50164 #undef TARGET_LEGITIMATE_COMBINED_INSN
50165 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
50167 #undef TARGET_ASAN_SHADOW_OFFSET
50168 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
50170 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
50171 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
50173 #undef TARGET_SCALAR_MODE_SUPPORTED_P
50174 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
50176 #undef TARGET_VECTOR_MODE_SUPPORTED_P
50177 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
50179 #undef TARGET_C_MODE_FOR_SUFFIX
50180 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
50182 #ifdef HAVE_AS_TLS
50183 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
50184 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
50185 #endif
50187 #ifdef SUBTARGET_INSERT_ATTRIBUTES
50188 #undef TARGET_INSERT_ATTRIBUTES
50189 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
50190 #endif
50192 #undef TARGET_MANGLE_TYPE
50193 #define TARGET_MANGLE_TYPE ix86_mangle_type
50195 #if !TARGET_MACHO
50196 #undef TARGET_STACK_PROTECT_FAIL
50197 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
50198 #endif
50200 #undef TARGET_FUNCTION_VALUE
50201 #define TARGET_FUNCTION_VALUE ix86_function_value
50203 #undef TARGET_FUNCTION_VALUE_REGNO_P
50204 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
50206 #undef TARGET_PROMOTE_FUNCTION_MODE
50207 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
50209 #undef TARGET_MEMBER_TYPE_FORCES_BLK
50210 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
50212 #undef TARGET_INSTANTIATE_DECLS
50213 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
50215 #undef TARGET_SECONDARY_RELOAD
50216 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
50218 #undef TARGET_CLASS_MAX_NREGS
50219 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
50221 #undef TARGET_PREFERRED_RELOAD_CLASS
50222 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
50223 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
50224 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
50225 #undef TARGET_CLASS_LIKELY_SPILLED_P
50226 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
50228 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
50229 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
50230 ix86_builtin_vectorization_cost
50231 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
50232 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
50233 ix86_vectorize_vec_perm_const_ok
50234 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
50235 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
50236 ix86_preferred_simd_mode
50237 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
50238 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
50239 ix86_autovectorize_vector_sizes
50240 #undef TARGET_VECTORIZE_INIT_COST
50241 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
50242 #undef TARGET_VECTORIZE_ADD_STMT_COST
50243 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
50244 #undef TARGET_VECTORIZE_FINISH_COST
50245 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
50246 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
50247 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
50249 #undef TARGET_SET_CURRENT_FUNCTION
50250 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
50252 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
50253 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
50255 #undef TARGET_OPTION_SAVE
50256 #define TARGET_OPTION_SAVE ix86_function_specific_save
50258 #undef TARGET_OPTION_RESTORE
50259 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
50261 #undef TARGET_OPTION_PRINT
50262 #define TARGET_OPTION_PRINT ix86_function_specific_print
50264 #undef TARGET_OPTION_FUNCTION_VERSIONS
50265 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
50267 #undef TARGET_CAN_INLINE_P
50268 #define TARGET_CAN_INLINE_P ix86_can_inline_p
50270 #undef TARGET_EXPAND_TO_RTL_HOOK
50271 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
50273 #undef TARGET_LEGITIMATE_ADDRESS_P
50274 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
50276 #undef TARGET_LRA_P
50277 #define TARGET_LRA_P hook_bool_void_true
50279 #undef TARGET_REGISTER_PRIORITY
50280 #define TARGET_REGISTER_PRIORITY ix86_register_priority
50282 #undef TARGET_REGISTER_USAGE_LEVELING_P
50283 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
50285 #undef TARGET_LEGITIMATE_CONSTANT_P
50286 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
50288 #undef TARGET_FRAME_POINTER_REQUIRED
50289 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
50291 #undef TARGET_CAN_ELIMINATE
50292 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
50294 #undef TARGET_EXTRA_LIVE_ON_ENTRY
50295 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
50297 #undef TARGET_ASM_CODE_END
50298 #define TARGET_ASM_CODE_END ix86_code_end
50300 #undef TARGET_CONDITIONAL_REGISTER_USAGE
50301 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
50303 #if TARGET_MACHO
50304 #undef TARGET_INIT_LIBFUNCS
50305 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
50306 #endif
50308 #undef TARGET_LOOP_UNROLL_ADJUST
50309 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
50311 #undef TARGET_SPILL_CLASS
50312 #define TARGET_SPILL_CLASS ix86_spill_class
50314 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
50315 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
50316 ix86_simd_clone_compute_vecsize_and_simdlen
50318 #undef TARGET_SIMD_CLONE_ADJUST
50319 #define TARGET_SIMD_CLONE_ADJUST \
50320 ix86_simd_clone_adjust
50322 #undef TARGET_SIMD_CLONE_USABLE
50323 #define TARGET_SIMD_CLONE_USABLE \
50324 ix86_simd_clone_usable
50326 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
50327 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
50328 ix86_float_exceptions_rounding_supported_p
50330 #undef TARGET_MODE_EMIT
50331 #define TARGET_MODE_EMIT ix86_emit_mode_set
50333 #undef TARGET_MODE_NEEDED
50334 #define TARGET_MODE_NEEDED ix86_mode_needed
50336 #undef TARGET_MODE_AFTER
50337 #define TARGET_MODE_AFTER ix86_mode_after
50339 #undef TARGET_MODE_ENTRY
50340 #define TARGET_MODE_ENTRY ix86_mode_entry
50342 #undef TARGET_MODE_EXIT
50343 #define TARGET_MODE_EXIT ix86_mode_exit
50345 #undef TARGET_MODE_PRIORITY
50346 #define TARGET_MODE_PRIORITY ix86_mode_priority
50348 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
50349 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
50351 struct gcc_target targetm = TARGET_INITIALIZER;
50353 #include "gt-i386.h"